{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# python数据分析之Pandas-3"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "**导入包**"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "**读入数据**"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "ename": "XLRDError",
     "evalue": "Excel xlsx file; not supported",
     "output_type": "error",
     "traceback": [
      "\u001B[0;31m---------------------------------------------------------------------------\u001B[0m",
      "\u001B[0;31mXLRDError\u001B[0m                                 Traceback (most recent call last)",
      "\u001B[0;32m/var/folders/fj/074djdr13178c4hpdlwt37r00000gp/T/ipykernel_11662/1192663001.py\u001B[0m in \u001B[0;36m<module>\u001B[0;34m\u001B[0m\n\u001B[0;32m----> 1\u001B[0;31m \u001B[0mdf\u001B[0m \u001B[0;34m=\u001B[0m \u001B[0mpd\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0mread_excel\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0;34mr'/Users/feiyi-lgh/Documents/projectSrc/pythonSrc/个人学习/python-tutorial/python_常用工具/python数据分析/5.python数据分析之Pandas-3/movie_data3.xlsx'\u001B[0m\u001B[0;34m,\u001B[0m \u001B[0mindex_col\u001B[0m \u001B[0;34m=\u001B[0m \u001B[0;36m0\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[0m",
      "\u001B[0;32m~/opt/anaconda3/envs/py37-aiops/lib/python3.7/site-packages/pandas/io/excel/_base.py\u001B[0m in \u001B[0;36mread_excel\u001B[0;34m(io, sheet_name, header, names, index_col, usecols, squeeze, dtype, engine, converters, true_values, false_values, skiprows, nrows, na_values, keep_default_na, verbose, parse_dates, date_parser, thousands, comment, skipfooter, convert_float, mangle_dupe_cols, **kwds)\u001B[0m\n\u001B[1;32m    302\u001B[0m \u001B[0;34m\u001B[0m\u001B[0m\n\u001B[1;32m    303\u001B[0m     \u001B[0;32mif\u001B[0m \u001B[0;32mnot\u001B[0m \u001B[0misinstance\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0mio\u001B[0m\u001B[0;34m,\u001B[0m \u001B[0mExcelFile\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m:\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[0;32m--> 304\u001B[0;31m         \u001B[0mio\u001B[0m \u001B[0;34m=\u001B[0m \u001B[0mExcelFile\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0mio\u001B[0m\u001B[0;34m,\u001B[0m \u001B[0mengine\u001B[0m\u001B[0;34m=\u001B[0m\u001B[0mengine\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[0m\u001B[1;32m    305\u001B[0m     \u001B[0;32melif\u001B[0m \u001B[0mengine\u001B[0m \u001B[0;32mand\u001B[0m \u001B[0mengine\u001B[0m \u001B[0;34m!=\u001B[0m \u001B[0mio\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0mengine\u001B[0m\u001B[0;34m:\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[1;32m    306\u001B[0m         raise ValueError(\n",
      "\u001B[0;32m~/opt/anaconda3/envs/py37-aiops/lib/python3.7/site-packages/pandas/io/excel/_base.py\u001B[0m in \u001B[0;36m__init__\u001B[0;34m(self, io, engine)\u001B[0m\n\u001B[1;32m    822\u001B[0m         \u001B[0mself\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0m_io\u001B[0m \u001B[0;34m=\u001B[0m \u001B[0mstringify_path\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0mio\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[1;32m    823\u001B[0m \u001B[0;34m\u001B[0m\u001B[0m\n\u001B[0;32m--> 824\u001B[0;31m         \u001B[0mself\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0m_reader\u001B[0m \u001B[0;34m=\u001B[0m \u001B[0mself\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0m_engines\u001B[0m\u001B[0;34m[\u001B[0m\u001B[0mengine\u001B[0m\u001B[0;34m]\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0mself\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0m_io\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[0m\u001B[1;32m    825\u001B[0m \u001B[0;34m\u001B[0m\u001B[0m\n\u001B[1;32m    826\u001B[0m     \u001B[0;32mdef\u001B[0m \u001B[0m__fspath__\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0mself\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m:\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n",
      "\u001B[0;32m~/opt/anaconda3/envs/py37-aiops/lib/python3.7/site-packages/pandas/io/excel/_xlrd.py\u001B[0m in \u001B[0;36m__init__\u001B[0;34m(self, filepath_or_buffer)\u001B[0m\n\u001B[1;32m     19\u001B[0m         \u001B[0merr_msg\u001B[0m \u001B[0;34m=\u001B[0m \u001B[0;34m\"Install xlrd >= 1.0.0 for Excel support\"\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[1;32m     20\u001B[0m         \u001B[0mimport_optional_dependency\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0;34m\"xlrd\"\u001B[0m\u001B[0;34m,\u001B[0m \u001B[0mextra\u001B[0m\u001B[0;34m=\u001B[0m\u001B[0merr_msg\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[0;32m---> 21\u001B[0;31m         \u001B[0msuper\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0m__init__\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0mfilepath_or_buffer\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[0m\u001B[1;32m     22\u001B[0m \u001B[0;34m\u001B[0m\u001B[0m\n\u001B[1;32m     23\u001B[0m     \u001B[0;34m@\u001B[0m\u001B[0mproperty\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n",
      "\u001B[0;32m~/opt/anaconda3/envs/py37-aiops/lib/python3.7/site-packages/pandas/io/excel/_base.py\u001B[0m in \u001B[0;36m__init__\u001B[0;34m(self, filepath_or_buffer)\u001B[0m\n\u001B[1;32m    351\u001B[0m             \u001B[0mself\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0mbook\u001B[0m \u001B[0;34m=\u001B[0m \u001B[0mself\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0mload_workbook\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0mfilepath_or_buffer\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[1;32m    352\u001B[0m         \u001B[0;32melif\u001B[0m \u001B[0misinstance\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0mfilepath_or_buffer\u001B[0m\u001B[0;34m,\u001B[0m \u001B[0mstr\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m:\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[0;32m--> 353\u001B[0;31m             \u001B[0mself\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0mbook\u001B[0m \u001B[0;34m=\u001B[0m \u001B[0mself\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0mload_workbook\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0mfilepath_or_buffer\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[0m\u001B[1;32m    354\u001B[0m         \u001B[0;32melif\u001B[0m \u001B[0misinstance\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0mfilepath_or_buffer\u001B[0m\u001B[0;34m,\u001B[0m \u001B[0mbytes\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m:\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[1;32m    355\u001B[0m             \u001B[0mself\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0mbook\u001B[0m \u001B[0;34m=\u001B[0m \u001B[0mself\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0mload_workbook\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0mBytesIO\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0mfilepath_or_buffer\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n",
      "\u001B[0;32m~/opt/anaconda3/envs/py37-aiops/lib/python3.7/site-packages/pandas/io/excel/_xlrd.py\u001B[0m in \u001B[0;36mload_workbook\u001B[0;34m(self, filepath_or_buffer)\u001B[0m\n\u001B[1;32m     34\u001B[0m             \u001B[0;32mreturn\u001B[0m \u001B[0mopen_workbook\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0mfile_contents\u001B[0m\u001B[0;34m=\u001B[0m\u001B[0mdata\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[1;32m     35\u001B[0m         \u001B[0;32melse\u001B[0m\u001B[0;34m:\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[0;32m---> 36\u001B[0;31m             \u001B[0;32mreturn\u001B[0m \u001B[0mopen_workbook\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0mfilepath_or_buffer\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[0m\u001B[1;32m     37\u001B[0m \u001B[0;34m\u001B[0m\u001B[0m\n\u001B[1;32m     38\u001B[0m     \u001B[0;34m@\u001B[0m\u001B[0mproperty\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n",
      "\u001B[0;32m~/opt/anaconda3/envs/py37-aiops/lib/python3.7/site-packages/xlrd/__init__.py\u001B[0m in \u001B[0;36mopen_workbook\u001B[0;34m(filename, logfile, verbosity, use_mmap, file_contents, encoding_override, formatting_info, on_demand, ragged_rows, ignore_workbook_corruption)\u001B[0m\n\u001B[1;32m    168\u001B[0m     \u001B[0;31m# files that xlrd can parse don't start with the expected signature.\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[1;32m    169\u001B[0m     \u001B[0;32mif\u001B[0m \u001B[0mfile_format\u001B[0m \u001B[0;32mand\u001B[0m \u001B[0mfile_format\u001B[0m \u001B[0;34m!=\u001B[0m \u001B[0;34m'xls'\u001B[0m\u001B[0;34m:\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[0;32m--> 170\u001B[0;31m         \u001B[0;32mraise\u001B[0m \u001B[0mXLRDError\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0mFILE_FORMAT_DESCRIPTIONS\u001B[0m\u001B[0;34m[\u001B[0m\u001B[0mfile_format\u001B[0m\u001B[0;34m]\u001B[0m\u001B[0;34m+\u001B[0m\u001B[0;34m'; not supported'\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[0m\u001B[1;32m    171\u001B[0m \u001B[0;34m\u001B[0m\u001B[0m\n\u001B[1;32m    172\u001B[0m     bk = open_workbook_xls(\n",
      "\u001B[0;31mXLRDError\u001B[0m: Excel xlsx file; not supported"
     ]
    }
   ],
   "source": [
    "df = pd.read_excel(r'/Users/feiyi-lgh/Documents/projectSrc/pythonSrc/个人学习/python-tutorial/python_常用工具/python数据分析/5.python数据分析之Pandas-3/movie_data3.xlsx', index_col = 0)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "ename": "NameError",
     "evalue": "name 'df' is not defined",
     "output_type": "error",
     "traceback": [
      "\u001B[0;31m---------------------------------------------------------------------------\u001B[0m",
      "\u001B[0;31mNameError\u001B[0m                                 Traceback (most recent call last)",
      "\u001B[0;32m/var/folders/fj/074djdr13178c4hpdlwt37r00000gp/T/ipykernel_11662/3654452192.py\u001B[0m in \u001B[0;36m<module>\u001B[0;34m\u001B[0m\n\u001B[0;32m----> 1\u001B[0;31m \u001B[0mdf\u001B[0m\u001B[0;34m[\u001B[0m\u001B[0;34m:\u001B[0m\u001B[0;36m5\u001B[0m\u001B[0;34m]\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[0m",
      "\u001B[0;31mNameError\u001B[0m: name 'df' is not defined"
     ]
    }
   ],
   "source": [
    "df[:5]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 3.1 数据重塑和轴向旋转 "
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### (1)层次化索引 "
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "层次化索引是pandas的一项重要功能，它能使我们在一个轴上拥有多个索引。"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Series的层次化索引： "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "a  1    1\n",
       "   2    2\n",
       "   3    3\n",
       "b  1    4\n",
       "   2    5\n",
       "c  3    6\n",
       "   1    7\n",
       "d  2    8\n",
       "   3    9\n",
       "dtype: int64"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "s = pd.Series(np.arange(1,10), index = [['a','a','a','b','b','c','c','d','d'], [1,2,3,1,2,3,1,2,3]])\n",
    "s #类似于合并单元格"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "MultiIndex([('a', 1),\n",
       "            ('a', 2),\n",
       "            ('a', 3),\n",
       "            ('b', 1),\n",
       "            ('b', 2),\n",
       "            ('c', 3),\n",
       "            ('c', 1),\n",
       "            ('d', 2),\n",
       "            ('d', 3)],\n",
       "           )"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "s.index"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "1    1\n",
       "2    2\n",
       "3    3\n",
       "dtype: int64"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "s['a'] #外层索引"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "a  1    1\n",
       "   2    2\n",
       "   3    3\n",
       "b  1    4\n",
       "   2    5\n",
       "c  3    6\n",
       "   1    7\n",
       "dtype: int64"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "s['a':'c'] #切片"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "a    1\n",
       "b    4\n",
       "c    7\n",
       "dtype: int64"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "s[:,1] #内层索引"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "6"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "s['c',3] #提取具体的值"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "**通过unstack方法可以将Series变成一个DataFrame**\n",
    "![pic](d5eff56ef6b2701728fee6e8927ec62.jpg)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>1</th>\n",
       "      <th>2</th>\n",
       "      <th>3</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>a</th>\n",
       "      <td>1.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>3.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>b</th>\n",
       "      <td>4.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>c</th>\n",
       "      <td>7.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>6.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>d</th>\n",
       "      <td>NaN</td>\n",
       "      <td>8.0</td>\n",
       "      <td>9.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "     1    2    3\n",
       "a  1.0  2.0  3.0\n",
       "b  4.0  5.0  NaN\n",
       "c  7.0  NaN  6.0\n",
       "d  NaN  8.0  9.0"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "s.unstack()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "a  1    1.0\n",
       "   2    2.0\n",
       "   3    3.0\n",
       "b  1    4.0\n",
       "   2    5.0\n",
       "c  1    7.0\n",
       "   3    6.0\n",
       "d  2    8.0\n",
       "   3    9.0\n",
       "dtype: float64"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "s.unstack().stack() #形式上的相互转换"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Dataframe的层次化索引： "
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "对于DataFrame来说，行和列都能进行层次化索引。"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead tr th {\n",
       "        text-align: left;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th colspan=\"2\" halign=\"left\">A</th>\n",
       "      <th>B</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th>Z</th>\n",
       "      <th>X</th>\n",
       "      <th>C</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th rowspan=\"2\" valign=\"top\">a</th>\n",
       "      <th>1</th>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>3</td>\n",
       "      <td>4</td>\n",
       "      <td>5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"2\" valign=\"top\">b</th>\n",
       "      <th>1</th>\n",
       "      <td>6</td>\n",
       "      <td>7</td>\n",
       "      <td>8</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>9</td>\n",
       "      <td>10</td>\n",
       "      <td>11</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "     A       B\n",
       "     Z   X   C\n",
       "a 1  0   1   2\n",
       "  2  3   4   5\n",
       "b 1  6   7   8\n",
       "  2  9  10  11"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data = pd.DataFrame(np.arange(12).reshape(4,3), index = [['a','a','b','b'],[1,2,1,2]], columns = [['A','A','B'],['Z','X','C']])\n",
    "data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th>Z</th>\n",
       "      <th>X</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th rowspan=\"2\" valign=\"top\">a</th>\n",
       "      <th>1</th>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>3</td>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"2\" valign=\"top\">b</th>\n",
       "      <th>1</th>\n",
       "      <td>6</td>\n",
       "      <td>7</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>9</td>\n",
       "      <td>10</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "     Z   X\n",
       "a 1  0   1\n",
       "  2  3   4\n",
       "b 1  6   7\n",
       "  2  9  10"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data['A']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead tr th {\n",
       "        text-align: left;\n",
       "    }\n",
       "\n",
       "    .dataframe thead tr:last-of-type th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr>\n",
       "      <th></th>\n",
       "      <th>col1</th>\n",
       "      <th colspan=\"2\" halign=\"left\">A</th>\n",
       "      <th>B</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th></th>\n",
       "      <th>col2</th>\n",
       "      <th>Z</th>\n",
       "      <th>X</th>\n",
       "      <th>C</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>row1</th>\n",
       "      <th>row2</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th rowspan=\"2\" valign=\"top\">a</th>\n",
       "      <th>1</th>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>3</td>\n",
       "      <td>4</td>\n",
       "      <td>5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"2\" valign=\"top\">b</th>\n",
       "      <th>1</th>\n",
       "      <td>6</td>\n",
       "      <td>7</td>\n",
       "      <td>8</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>9</td>\n",
       "      <td>10</td>\n",
       "      <td>11</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "col1       A       B\n",
       "col2       Z   X   C\n",
       "row1 row2           \n",
       "a    1     0   1   2\n",
       "     2     3   4   5\n",
       "b    1     6   7   8\n",
       "     2     9  10  11"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data.index.names = [\"row1\",\"row2\"]\n",
    "data.columns.names = [\"col1\", \"col2\"]\n",
    "data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead tr th {\n",
       "        text-align: left;\n",
       "    }\n",
       "\n",
       "    .dataframe thead tr:last-of-type th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr>\n",
       "      <th></th>\n",
       "      <th>col1</th>\n",
       "      <th colspan=\"2\" halign=\"left\">A</th>\n",
       "      <th>B</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th></th>\n",
       "      <th>col2</th>\n",
       "      <th>Z</th>\n",
       "      <th>X</th>\n",
       "      <th>C</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>row2</th>\n",
       "      <th>row1</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <th>a</th>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <th>a</th>\n",
       "      <td>3</td>\n",
       "      <td>4</td>\n",
       "      <td>5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <th>b</th>\n",
       "      <td>6</td>\n",
       "      <td>7</td>\n",
       "      <td>8</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <th>b</th>\n",
       "      <td>9</td>\n",
       "      <td>10</td>\n",
       "      <td>11</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "col1       A       B\n",
       "col2       Z   X   C\n",
       "row2 row1           \n",
       "1    a     0   1   2\n",
       "2    a     3   4   5\n",
       "1    b     6   7   8\n",
       "2    b     9  10  11"
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data.swaplevel(\"row1\",\"row2\") #位置调整"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 了解了层次化索引的基本知识之后，我们试着将电影数据也处理成一种多层索引的结构。 "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Int64Index([    0,     1,     2,     3,     4,     5,     6,     7,     8,\n",
       "                9,\n",
       "            ...\n",
       "            38153, 38154, 38155, 38156, 38157, 38158, 38159, 38160, 38161,\n",
       "            38162],\n",
       "           dtype='int64', length=38163)"
      ]
     },
     "execution_count": 16,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.index #默认索引"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 把产地和年代同时设成索引，产地是外层索引，年代为内层索引。 "
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### set_index可以把列变成索引\n",
    "\n",
    "#### reset_index是把索引变成列 "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th>名字</th>\n",
       "      <th>投票人数</th>\n",
       "      <th>类型</th>\n",
       "      <th>上映时间</th>\n",
       "      <th>时长</th>\n",
       "      <th>评分</th>\n",
       "      <th>首映地点</th>\n",
       "      <th>评分等级</th>\n",
       "      <th>热门程度</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>产地</th>\n",
       "      <th>年代</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th rowspan=\"2\" valign=\"top\">美国</th>\n",
       "      <th>1994</th>\n",
       "      <td>肖申克的救赎</td>\n",
       "      <td>692795</td>\n",
       "      <td>剧情/犯罪</td>\n",
       "      <td>1994-09-10 00:00:00</td>\n",
       "      <td>142</td>\n",
       "      <td>9.600000</td>\n",
       "      <td>多伦多电影节</td>\n",
       "      <td>A</td>\n",
       "      <td>A</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1957</th>\n",
       "      <td>控方证人</td>\n",
       "      <td>42995</td>\n",
       "      <td>剧情/悬疑/犯罪</td>\n",
       "      <td>1957-12-17 00:00:00</td>\n",
       "      <td>116</td>\n",
       "      <td>9.500000</td>\n",
       "      <td>美国</td>\n",
       "      <td>A</td>\n",
       "      <td>A</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>意大利</th>\n",
       "      <th>1997</th>\n",
       "      <td>美丽人生</td>\n",
       "      <td>327855</td>\n",
       "      <td>剧情/喜剧/爱情</td>\n",
       "      <td>1997-12-20 00:00:00</td>\n",
       "      <td>116</td>\n",
       "      <td>9.500000</td>\n",
       "      <td>意大利</td>\n",
       "      <td>A</td>\n",
       "      <td>A</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>美国</th>\n",
       "      <th>1994</th>\n",
       "      <td>阿甘正传</td>\n",
       "      <td>580897</td>\n",
       "      <td>剧情/爱情</td>\n",
       "      <td>1994-06-23 00:00:00</td>\n",
       "      <td>142</td>\n",
       "      <td>9.400000</td>\n",
       "      <td>洛杉矶首映</td>\n",
       "      <td>A</td>\n",
       "      <td>A</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>中国大陆</th>\n",
       "      <th>1993</th>\n",
       "      <td>霸王别姬</td>\n",
       "      <td>478523</td>\n",
       "      <td>剧情/爱情/同性</td>\n",
       "      <td>1993-01-01 00:00:00</td>\n",
       "      <td>171</td>\n",
       "      <td>9.400000</td>\n",
       "      <td>香港</td>\n",
       "      <td>A</td>\n",
       "      <td>A</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>美国</th>\n",
       "      <th>1935</th>\n",
       "      <td>1935年</td>\n",
       "      <td>57</td>\n",
       "      <td>喜剧/歌舞</td>\n",
       "      <td>1935-03-15 00:00:00</td>\n",
       "      <td>98</td>\n",
       "      <td>7.600000</td>\n",
       "      <td>美国</td>\n",
       "      <td>B</td>\n",
       "      <td>E</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"2\" valign=\"top\">中国大陆</th>\n",
       "      <th>1986</th>\n",
       "      <td>血溅画屏</td>\n",
       "      <td>95</td>\n",
       "      <td>剧情/悬疑/犯罪/武侠/古装</td>\n",
       "      <td>1905-06-08 00:00:00</td>\n",
       "      <td>91</td>\n",
       "      <td>7.100000</td>\n",
       "      <td>美国</td>\n",
       "      <td>B</td>\n",
       "      <td>D</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1986</th>\n",
       "      <td>魔窟中的幻想</td>\n",
       "      <td>51</td>\n",
       "      <td>惊悚/恐怖/儿童</td>\n",
       "      <td>1905-06-08 00:00:00</td>\n",
       "      <td>78</td>\n",
       "      <td>8.000000</td>\n",
       "      <td>美国</td>\n",
       "      <td>B</td>\n",
       "      <td>E</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>俄罗斯</th>\n",
       "      <th>1977</th>\n",
       "      <td>列宁格勒围困之星火战役 Блокада: Фильм 2: Ленинградский ме...</td>\n",
       "      <td>32</td>\n",
       "      <td>剧情/战争</td>\n",
       "      <td>1905-05-30 00:00:00</td>\n",
       "      <td>97</td>\n",
       "      <td>6.600000</td>\n",
       "      <td>美国</td>\n",
       "      <td>C</td>\n",
       "      <td>E</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>美国</th>\n",
       "      <th>2018</th>\n",
       "      <td>复仇者联盟3</td>\n",
       "      <td>123456</td>\n",
       "      <td>剧情/科幻</td>\n",
       "      <td>2018-05-04 00:00:00</td>\n",
       "      <td>142</td>\n",
       "      <td>6.935704</td>\n",
       "      <td>美国</td>\n",
       "      <td>C</td>\n",
       "      <td>A</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>38163 rows × 9 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                                                          名字    投票人数  \\\n",
       "产地   年代                                                                \n",
       "美国   1994                                             肖申克的救赎  692795   \n",
       "     1957                                               控方证人   42995   \n",
       "意大利  1997                                              美丽人生   327855   \n",
       "美国   1994                                               阿甘正传  580897   \n",
       "中国大陆 1993                                               霸王别姬  478523   \n",
       "...                                                      ...     ...   \n",
       "美国   1935                                              1935年      57   \n",
       "中国大陆 1986                                               血溅画屏      95   \n",
       "     1986                                             魔窟中的幻想      51   \n",
       "俄罗斯  1977  列宁格勒围困之星火战役 Блокада: Фильм 2: Ленинградский ме...      32   \n",
       "美国   2018                                             复仇者联盟3  123456   \n",
       "\n",
       "                       类型                 上映时间   时长        评分    首映地点 评分等级  \\\n",
       "产地   年代                                                                      \n",
       "美国   1994           剧情/犯罪  1994-09-10 00:00:00  142  9.600000  多伦多电影节    A   \n",
       "     1957        剧情/悬疑/犯罪  1957-12-17 00:00:00  116  9.500000      美国    A   \n",
       "意大利  1997        剧情/喜剧/爱情  1997-12-20 00:00:00  116  9.500000     意大利    A   \n",
       "美国   1994           剧情/爱情  1994-06-23 00:00:00  142  9.400000   洛杉矶首映    A   \n",
       "中国大陆 1993        剧情/爱情/同性  1993-01-01 00:00:00  171  9.400000      香港    A   \n",
       "...                   ...                  ...  ...       ...     ...  ...   \n",
       "美国   1935           喜剧/歌舞  1935-03-15 00:00:00   98  7.600000      美国    B   \n",
       "中国大陆 1986  剧情/悬疑/犯罪/武侠/古装  1905-06-08 00:00:00   91  7.100000      美国    B   \n",
       "     1986        惊悚/恐怖/儿童  1905-06-08 00:00:00   78  8.000000      美国    B   \n",
       "俄罗斯  1977           剧情/战争  1905-05-30 00:00:00   97  6.600000      美国    C   \n",
       "美国   2018           剧情/科幻  2018-05-04 00:00:00  142  6.935704      美国    C   \n",
       "\n",
       "          热门程度  \n",
       "产地   年代         \n",
       "美国   1994    A  \n",
       "     1957    A  \n",
       "意大利  1997    A  \n",
       "美国   1994    A  \n",
       "中国大陆 1993    A  \n",
       "...        ...  \n",
       "美国   1935    E  \n",
       "中国大陆 1986    D  \n",
       "     1986    E  \n",
       "俄罗斯  1977    E  \n",
       "美国   2018    A  \n",
       "\n",
       "[38163 rows x 9 columns]"
      ]
     },
     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df = df.set_index([\"产地\", \"年代\"])\n",
    "df"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 每一个索引都是一个元组 "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "('美国', 1994)"
      ]
     },
     "execution_count": 18,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.index[0]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### 获取所有的美国电影，由于产地信息已经变成了索引，因此要是用.loc方法。 "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>名字</th>\n",
       "      <th>投票人数</th>\n",
       "      <th>类型</th>\n",
       "      <th>上映时间</th>\n",
       "      <th>时长</th>\n",
       "      <th>评分</th>\n",
       "      <th>首映地点</th>\n",
       "      <th>评分等级</th>\n",
       "      <th>热门程度</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>年代</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>1994</th>\n",
       "      <td>肖申克的救赎</td>\n",
       "      <td>692795</td>\n",
       "      <td>剧情/犯罪</td>\n",
       "      <td>1994-09-10 00:00:00</td>\n",
       "      <td>142</td>\n",
       "      <td>9.600000</td>\n",
       "      <td>多伦多电影节</td>\n",
       "      <td>A</td>\n",
       "      <td>A</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1957</th>\n",
       "      <td>控方证人</td>\n",
       "      <td>42995</td>\n",
       "      <td>剧情/悬疑/犯罪</td>\n",
       "      <td>1957-12-17 00:00:00</td>\n",
       "      <td>116</td>\n",
       "      <td>9.500000</td>\n",
       "      <td>美国</td>\n",
       "      <td>A</td>\n",
       "      <td>A</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1994</th>\n",
       "      <td>阿甘正传</td>\n",
       "      <td>580897</td>\n",
       "      <td>剧情/爱情</td>\n",
       "      <td>1994-06-23 00:00:00</td>\n",
       "      <td>142</td>\n",
       "      <td>9.400000</td>\n",
       "      <td>洛杉矶首映</td>\n",
       "      <td>A</td>\n",
       "      <td>A</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2012</th>\n",
       "      <td>泰坦尼克号</td>\n",
       "      <td>157074</td>\n",
       "      <td>剧情/爱情/灾难</td>\n",
       "      <td>2012-04-10 00:00:00</td>\n",
       "      <td>194</td>\n",
       "      <td>9.400000</td>\n",
       "      <td>中国大陆</td>\n",
       "      <td>A</td>\n",
       "      <td>A</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1993</th>\n",
       "      <td>辛德勒的名单</td>\n",
       "      <td>306904</td>\n",
       "      <td>剧情/历史/战争</td>\n",
       "      <td>1993-11-30 00:00:00</td>\n",
       "      <td>195</td>\n",
       "      <td>9.400000</td>\n",
       "      <td>华盛顿首映</td>\n",
       "      <td>A</td>\n",
       "      <td>A</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1987</th>\n",
       "      <td>零下的激情</td>\n",
       "      <td>199</td>\n",
       "      <td>剧情/爱情/犯罪</td>\n",
       "      <td>1987-11-06 00:00:00</td>\n",
       "      <td>98</td>\n",
       "      <td>7.400000</td>\n",
       "      <td>美国</td>\n",
       "      <td>B</td>\n",
       "      <td>D</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1986</th>\n",
       "      <td>离别秋波</td>\n",
       "      <td>240</td>\n",
       "      <td>剧情/爱情/音乐</td>\n",
       "      <td>1986-02-19 00:00:00</td>\n",
       "      <td>90</td>\n",
       "      <td>8.200000</td>\n",
       "      <td>美国</td>\n",
       "      <td>B</td>\n",
       "      <td>C</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1986</th>\n",
       "      <td>极乐森林</td>\n",
       "      <td>45</td>\n",
       "      <td>纪录片</td>\n",
       "      <td>1986-09-14 00:00:00</td>\n",
       "      <td>90</td>\n",
       "      <td>8.100000</td>\n",
       "      <td>美国</td>\n",
       "      <td>B</td>\n",
       "      <td>E</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1935</th>\n",
       "      <td>1935年</td>\n",
       "      <td>57</td>\n",
       "      <td>喜剧/歌舞</td>\n",
       "      <td>1935-03-15 00:00:00</td>\n",
       "      <td>98</td>\n",
       "      <td>7.600000</td>\n",
       "      <td>美国</td>\n",
       "      <td>B</td>\n",
       "      <td>E</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2018</th>\n",
       "      <td>复仇者联盟3</td>\n",
       "      <td>123456</td>\n",
       "      <td>剧情/科幻</td>\n",
       "      <td>2018-05-04 00:00:00</td>\n",
       "      <td>142</td>\n",
       "      <td>6.935704</td>\n",
       "      <td>美国</td>\n",
       "      <td>C</td>\n",
       "      <td>A</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>11714 rows × 9 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "          名字    投票人数        类型                 上映时间   时长        评分    首映地点  \\\n",
       "年代                                                                           \n",
       "1994  肖申克的救赎  692795     剧情/犯罪  1994-09-10 00:00:00  142  9.600000  多伦多电影节   \n",
       "1957    控方证人   42995  剧情/悬疑/犯罪  1957-12-17 00:00:00  116  9.500000      美国   \n",
       "1994    阿甘正传  580897     剧情/爱情  1994-06-23 00:00:00  142  9.400000   洛杉矶首映   \n",
       "2012  泰坦尼克号   157074  剧情/爱情/灾难  2012-04-10 00:00:00  194  9.400000    中国大陆   \n",
       "1993  辛德勒的名单  306904  剧情/历史/战争  1993-11-30 00:00:00  195  9.400000   华盛顿首映   \n",
       "...      ...     ...       ...                  ...  ...       ...     ...   \n",
       "1987   零下的激情     199  剧情/爱情/犯罪  1987-11-06 00:00:00   98  7.400000      美国   \n",
       "1986    离别秋波     240  剧情/爱情/音乐  1986-02-19 00:00:00   90  8.200000      美国   \n",
       "1986    极乐森林      45       纪录片  1986-09-14 00:00:00   90  8.100000      美国   \n",
       "1935   1935年      57     喜剧/歌舞  1935-03-15 00:00:00   98  7.600000      美国   \n",
       "2018  复仇者联盟3  123456     剧情/科幻  2018-05-04 00:00:00  142  6.935704      美国   \n",
       "\n",
       "     评分等级 热门程度  \n",
       "年代              \n",
       "1994    A    A  \n",
       "1957    A    A  \n",
       "1994    A    A  \n",
       "2012    A    A  \n",
       "1993    A    A  \n",
       "...   ...  ...  \n",
       "1987    B    D  \n",
       "1986    B    C  \n",
       "1986    B    E  \n",
       "1935    B    E  \n",
       "2018    C    A  \n",
       "\n",
       "[11714 rows x 9 columns]"
      ]
     },
     "execution_count": 19,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.loc[\"美国\"] #行标签索引行数据，注意索引多行时两边都是闭区间"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>名字</th>\n",
       "      <th>投票人数</th>\n",
       "      <th>类型</th>\n",
       "      <th>上映时间</th>\n",
       "      <th>时长</th>\n",
       "      <th>评分</th>\n",
       "      <th>首映地点</th>\n",
       "      <th>评分等级</th>\n",
       "      <th>热门程度</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>年代</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>1993</th>\n",
       "      <td>霸王别姬</td>\n",
       "      <td>478523</td>\n",
       "      <td>剧情/爱情/同性</td>\n",
       "      <td>1993-01-01 00:00:00</td>\n",
       "      <td>171</td>\n",
       "      <td>9.4</td>\n",
       "      <td>香港</td>\n",
       "      <td>A</td>\n",
       "      <td>A</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1961</th>\n",
       "      <td>大闹天宫</td>\n",
       "      <td>74881</td>\n",
       "      <td>动画/奇幻</td>\n",
       "      <td>1905-05-14 00:00:00</td>\n",
       "      <td>114</td>\n",
       "      <td>9.2</td>\n",
       "      <td>上集</td>\n",
       "      <td>A</td>\n",
       "      <td>A</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2015</th>\n",
       "      <td>穹顶之下</td>\n",
       "      <td>51113</td>\n",
       "      <td>纪录片</td>\n",
       "      <td>2015-02-28 00:00:00</td>\n",
       "      <td>104</td>\n",
       "      <td>9.2</td>\n",
       "      <td>中国大陆</td>\n",
       "      <td>A</td>\n",
       "      <td>A</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1982</th>\n",
       "      <td>茶馆</td>\n",
       "      <td>10678</td>\n",
       "      <td>剧情/历史</td>\n",
       "      <td>1905-06-04 00:00:00</td>\n",
       "      <td>118</td>\n",
       "      <td>9.2</td>\n",
       "      <td>美国</td>\n",
       "      <td>A</td>\n",
       "      <td>A</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1988</th>\n",
       "      <td>山水情</td>\n",
       "      <td>10781</td>\n",
       "      <td>动画/短片</td>\n",
       "      <td>1905-06-10 00:00:00</td>\n",
       "      <td>19</td>\n",
       "      <td>9.2</td>\n",
       "      <td>美国</td>\n",
       "      <td>A</td>\n",
       "      <td>A</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1986</th>\n",
       "      <td>T省的八四、八五</td>\n",
       "      <td>380</td>\n",
       "      <td>剧情</td>\n",
       "      <td>1905-06-08 00:00:00</td>\n",
       "      <td>94</td>\n",
       "      <td>8.7</td>\n",
       "      <td>美国</td>\n",
       "      <td>B</td>\n",
       "      <td>C</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1986</th>\n",
       "      <td>失踪的女中学生</td>\n",
       "      <td>101</td>\n",
       "      <td>儿童</td>\n",
       "      <td>1905-06-08 00:00:00</td>\n",
       "      <td>102</td>\n",
       "      <td>7.4</td>\n",
       "      <td>美国</td>\n",
       "      <td>B</td>\n",
       "      <td>D</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1986</th>\n",
       "      <td>血战台儿庄</td>\n",
       "      <td>2908</td>\n",
       "      <td>战争</td>\n",
       "      <td>1905-06-08 00:00:00</td>\n",
       "      <td>120</td>\n",
       "      <td>8.1</td>\n",
       "      <td>美国</td>\n",
       "      <td>B</td>\n",
       "      <td>A</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1986</th>\n",
       "      <td>血溅画屏</td>\n",
       "      <td>95</td>\n",
       "      <td>剧情/悬疑/犯罪/武侠/古装</td>\n",
       "      <td>1905-06-08 00:00:00</td>\n",
       "      <td>91</td>\n",
       "      <td>7.1</td>\n",
       "      <td>美国</td>\n",
       "      <td>B</td>\n",
       "      <td>D</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1986</th>\n",
       "      <td>魔窟中的幻想</td>\n",
       "      <td>51</td>\n",
       "      <td>惊悚/恐怖/儿童</td>\n",
       "      <td>1905-06-08 00:00:00</td>\n",
       "      <td>78</td>\n",
       "      <td>8.0</td>\n",
       "      <td>美国</td>\n",
       "      <td>B</td>\n",
       "      <td>E</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>3791 rows × 9 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "            名字    投票人数              类型                 上映时间   时长   评分  首映地点  \\\n",
       "年代                                                                            \n",
       "1993      霸王别姬  478523        剧情/爱情/同性  1993-01-01 00:00:00  171  9.4    香港   \n",
       "1961      大闹天宫   74881           动画/奇幻  1905-05-14 00:00:00  114  9.2    上集   \n",
       "2015      穹顶之下   51113             纪录片  2015-02-28 00:00:00  104  9.2  中国大陆   \n",
       "1982        茶馆   10678           剧情/历史  1905-06-04 00:00:00  118  9.2    美国   \n",
       "1988       山水情   10781           动画/短片  1905-06-10 00:00:00   19  9.2    美国   \n",
       "...        ...     ...             ...                  ...  ...  ...   ...   \n",
       "1986  T省的八四、八五     380              剧情  1905-06-08 00:00:00   94  8.7    美国   \n",
       "1986   失踪的女中学生     101              儿童  1905-06-08 00:00:00  102  7.4    美国   \n",
       "1986     血战台儿庄    2908              战争  1905-06-08 00:00:00  120  8.1    美国   \n",
       "1986      血溅画屏      95  剧情/悬疑/犯罪/武侠/古装  1905-06-08 00:00:00   91  7.1    美国   \n",
       "1986    魔窟中的幻想      51        惊悚/恐怖/儿童  1905-06-08 00:00:00   78  8.0    美国   \n",
       "\n",
       "     评分等级 热门程度  \n",
       "年代              \n",
       "1993    A    A  \n",
       "1961    A    A  \n",
       "2015    A    A  \n",
       "1982    A    A  \n",
       "1988    A    A  \n",
       "...   ...  ...  \n",
       "1986    B    C  \n",
       "1986    B    D  \n",
       "1986    B    A  \n",
       "1986    B    D  \n",
       "1986    B    E  \n",
       "\n",
       "[3791 rows x 9 columns]"
      ]
     },
     "execution_count": 20,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.loc[\"中国大陆\"]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### 这样做的最大好处是我们可以简化很多的筛选环节"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### 每一个索引是一个元组 "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th>名字</th>\n",
       "      <th>投票人数</th>\n",
       "      <th>类型</th>\n",
       "      <th>上映时间</th>\n",
       "      <th>时长</th>\n",
       "      <th>评分</th>\n",
       "      <th>首映地点</th>\n",
       "      <th>评分等级</th>\n",
       "      <th>热门程度</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>年代</th>\n",
       "      <th>产地</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>1994</th>\n",
       "      <th>美国</th>\n",
       "      <td>肖申克的救赎</td>\n",
       "      <td>692795</td>\n",
       "      <td>剧情/犯罪</td>\n",
       "      <td>1994-09-10 00:00:00</td>\n",
       "      <td>142</td>\n",
       "      <td>9.600000</td>\n",
       "      <td>多伦多电影节</td>\n",
       "      <td>A</td>\n",
       "      <td>A</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1957</th>\n",
       "      <th>美国</th>\n",
       "      <td>控方证人</td>\n",
       "      <td>42995</td>\n",
       "      <td>剧情/悬疑/犯罪</td>\n",
       "      <td>1957-12-17 00:00:00</td>\n",
       "      <td>116</td>\n",
       "      <td>9.500000</td>\n",
       "      <td>美国</td>\n",
       "      <td>A</td>\n",
       "      <td>A</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1997</th>\n",
       "      <th>意大利</th>\n",
       "      <td>美丽人生</td>\n",
       "      <td>327855</td>\n",
       "      <td>剧情/喜剧/爱情</td>\n",
       "      <td>1997-12-20 00:00:00</td>\n",
       "      <td>116</td>\n",
       "      <td>9.500000</td>\n",
       "      <td>意大利</td>\n",
       "      <td>A</td>\n",
       "      <td>A</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1994</th>\n",
       "      <th>美国</th>\n",
       "      <td>阿甘正传</td>\n",
       "      <td>580897</td>\n",
       "      <td>剧情/爱情</td>\n",
       "      <td>1994-06-23 00:00:00</td>\n",
       "      <td>142</td>\n",
       "      <td>9.400000</td>\n",
       "      <td>洛杉矶首映</td>\n",
       "      <td>A</td>\n",
       "      <td>A</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1993</th>\n",
       "      <th>中国大陆</th>\n",
       "      <td>霸王别姬</td>\n",
       "      <td>478523</td>\n",
       "      <td>剧情/爱情/同性</td>\n",
       "      <td>1993-01-01 00:00:00</td>\n",
       "      <td>171</td>\n",
       "      <td>9.400000</td>\n",
       "      <td>香港</td>\n",
       "      <td>A</td>\n",
       "      <td>A</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1935</th>\n",
       "      <th>美国</th>\n",
       "      <td>1935年</td>\n",
       "      <td>57</td>\n",
       "      <td>喜剧/歌舞</td>\n",
       "      <td>1935-03-15 00:00:00</td>\n",
       "      <td>98</td>\n",
       "      <td>7.600000</td>\n",
       "      <td>美国</td>\n",
       "      <td>B</td>\n",
       "      <td>E</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"2\" valign=\"top\">1986</th>\n",
       "      <th>中国大陆</th>\n",
       "      <td>血溅画屏</td>\n",
       "      <td>95</td>\n",
       "      <td>剧情/悬疑/犯罪/武侠/古装</td>\n",
       "      <td>1905-06-08 00:00:00</td>\n",
       "      <td>91</td>\n",
       "      <td>7.100000</td>\n",
       "      <td>美国</td>\n",
       "      <td>B</td>\n",
       "      <td>D</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>中国大陆</th>\n",
       "      <td>魔窟中的幻想</td>\n",
       "      <td>51</td>\n",
       "      <td>惊悚/恐怖/儿童</td>\n",
       "      <td>1905-06-08 00:00:00</td>\n",
       "      <td>78</td>\n",
       "      <td>8.000000</td>\n",
       "      <td>美国</td>\n",
       "      <td>B</td>\n",
       "      <td>E</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1977</th>\n",
       "      <th>俄罗斯</th>\n",
       "      <td>列宁格勒围困之星火战役 Блокада: Фильм 2: Ленинградский ме...</td>\n",
       "      <td>32</td>\n",
       "      <td>剧情/战争</td>\n",
       "      <td>1905-05-30 00:00:00</td>\n",
       "      <td>97</td>\n",
       "      <td>6.600000</td>\n",
       "      <td>美国</td>\n",
       "      <td>C</td>\n",
       "      <td>E</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2018</th>\n",
       "      <th>美国</th>\n",
       "      <td>复仇者联盟3</td>\n",
       "      <td>123456</td>\n",
       "      <td>剧情/科幻</td>\n",
       "      <td>2018-05-04 00:00:00</td>\n",
       "      <td>142</td>\n",
       "      <td>6.935704</td>\n",
       "      <td>美国</td>\n",
       "      <td>C</td>\n",
       "      <td>A</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>38163 rows × 9 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                                                          名字    投票人数  \\\n",
       "年代   产地                                                                \n",
       "1994 美国                                               肖申克的救赎  692795   \n",
       "1957 美国                                                 控方证人   42995   \n",
       "1997 意大利                                               美丽人生   327855   \n",
       "1994 美国                                                 阿甘正传  580897   \n",
       "1993 中国大陆                                               霸王别姬  478523   \n",
       "...                                                      ...     ...   \n",
       "1935 美国                                                1935年      57   \n",
       "1986 中国大陆                                               血溅画屏      95   \n",
       "     中国大陆                                             魔窟中的幻想      51   \n",
       "1977 俄罗斯   列宁格勒围困之星火战役 Блокада: Фильм 2: Ленинградский ме...      32   \n",
       "2018 美国                                               复仇者联盟3  123456   \n",
       "\n",
       "                       类型                 上映时间   时长        评分    首映地点 评分等级  \\\n",
       "年代   产地                                                                      \n",
       "1994 美国             剧情/犯罪  1994-09-10 00:00:00  142  9.600000  多伦多电影节    A   \n",
       "1957 美国          剧情/悬疑/犯罪  1957-12-17 00:00:00  116  9.500000      美国    A   \n",
       "1997 意大利         剧情/喜剧/爱情  1997-12-20 00:00:00  116  9.500000     意大利    A   \n",
       "1994 美国             剧情/爱情  1994-06-23 00:00:00  142  9.400000   洛杉矶首映    A   \n",
       "1993 中国大陆        剧情/爱情/同性  1993-01-01 00:00:00  171  9.400000      香港    A   \n",
       "...                   ...                  ...  ...       ...     ...  ...   \n",
       "1935 美国             喜剧/歌舞  1935-03-15 00:00:00   98  7.600000      美国    B   \n",
       "1986 中国大陆  剧情/悬疑/犯罪/武侠/古装  1905-06-08 00:00:00   91  7.100000      美国    B   \n",
       "     中国大陆        惊悚/恐怖/儿童  1905-06-08 00:00:00   78  8.000000      美国    B   \n",
       "1977 俄罗斯            剧情/战争  1905-05-30 00:00:00   97  6.600000      美国    C   \n",
       "2018 美国             剧情/科幻  2018-05-04 00:00:00  142  6.935704      美国    C   \n",
       "\n",
       "          热门程度  \n",
       "年代   产地         \n",
       "1994 美国      A  \n",
       "1957 美国      A  \n",
       "1997 意大利     A  \n",
       "1994 美国      A  \n",
       "1993 中国大陆    A  \n",
       "...        ...  \n",
       "1935 美国      E  \n",
       "1986 中国大陆    D  \n",
       "     中国大陆    E  \n",
       "1977 俄罗斯     E  \n",
       "2018 美国      A  \n",
       "\n",
       "[38163 rows x 9 columns]"
      ]
     },
     "execution_count": 21,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df = df.swaplevel(\"产地\", \"年代\") #调换标签顺序\n",
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>名字</th>\n",
       "      <th>投票人数</th>\n",
       "      <th>类型</th>\n",
       "      <th>上映时间</th>\n",
       "      <th>时长</th>\n",
       "      <th>评分</th>\n",
       "      <th>首映地点</th>\n",
       "      <th>评分等级</th>\n",
       "      <th>热门程度</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>产地</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>美国</th>\n",
       "      <td>肖申克的救赎</td>\n",
       "      <td>692795</td>\n",
       "      <td>剧情/犯罪</td>\n",
       "      <td>1994-09-10 00:00:00</td>\n",
       "      <td>142</td>\n",
       "      <td>9.6</td>\n",
       "      <td>多伦多电影节</td>\n",
       "      <td>A</td>\n",
       "      <td>A</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>美国</th>\n",
       "      <td>阿甘正传</td>\n",
       "      <td>580897</td>\n",
       "      <td>剧情/爱情</td>\n",
       "      <td>1994-06-23 00:00:00</td>\n",
       "      <td>142</td>\n",
       "      <td>9.4</td>\n",
       "      <td>洛杉矶首映</td>\n",
       "      <td>A</td>\n",
       "      <td>A</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>法国</th>\n",
       "      <td>这个杀手不太冷</td>\n",
       "      <td>662552</td>\n",
       "      <td>剧情/动作/犯罪</td>\n",
       "      <td>1994-09-14 00:00:00</td>\n",
       "      <td>133</td>\n",
       "      <td>9.4</td>\n",
       "      <td>法国</td>\n",
       "      <td>A</td>\n",
       "      <td>A</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>美国</th>\n",
       "      <td>34街的</td>\n",
       "      <td>768</td>\n",
       "      <td>剧情/家庭/奇幻</td>\n",
       "      <td>1994-12-23 00:00:00</td>\n",
       "      <td>114</td>\n",
       "      <td>7.9</td>\n",
       "      <td>美国</td>\n",
       "      <td>B</td>\n",
       "      <td>B</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>中国大陆</th>\n",
       "      <td>活着</td>\n",
       "      <td>202794</td>\n",
       "      <td>剧情/家庭</td>\n",
       "      <td>1994-05-18 00:00:00</td>\n",
       "      <td>132</td>\n",
       "      <td>9.0</td>\n",
       "      <td>法国</td>\n",
       "      <td>B</td>\n",
       "      <td>A</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>美国</th>\n",
       "      <td>鬼精灵2： 恐怖</td>\n",
       "      <td>60</td>\n",
       "      <td>喜剧/恐怖/奇幻</td>\n",
       "      <td>1994-04-08 00:00:00</td>\n",
       "      <td>85</td>\n",
       "      <td>5.8</td>\n",
       "      <td>美国</td>\n",
       "      <td>C</td>\n",
       "      <td>E</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>英国</th>\n",
       "      <td>黑色第16</td>\n",
       "      <td>44</td>\n",
       "      <td>剧情/惊悚</td>\n",
       "      <td>1996-02-01 00:00:00</td>\n",
       "      <td>106</td>\n",
       "      <td>6.8</td>\n",
       "      <td>美国</td>\n",
       "      <td>C</td>\n",
       "      <td>E</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>日本</th>\n",
       "      <td>蜡笔小新之布里布里王国的秘密宝藏 クレヨンしんちゃん ブリブリ王国の</td>\n",
       "      <td>2142</td>\n",
       "      <td>动画</td>\n",
       "      <td>1994-04-23 00:00:00</td>\n",
       "      <td>94</td>\n",
       "      <td>7.7</td>\n",
       "      <td>日本</td>\n",
       "      <td>B</td>\n",
       "      <td>B</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>日本</th>\n",
       "      <td>龙珠Z剧场版10：两人面临危机! 超战士难以成眠 ドラゴンボール Z 劇場版：危険なふたり！</td>\n",
       "      <td>579</td>\n",
       "      <td>动画</td>\n",
       "      <td>1994-03-12 00:00:00</td>\n",
       "      <td>53</td>\n",
       "      <td>7.2</td>\n",
       "      <td>美国</td>\n",
       "      <td>B</td>\n",
       "      <td>C</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>中国香港</th>\n",
       "      <td>重案实录之惊天械劫案 重案實錄之驚天械劫</td>\n",
       "      <td>90</td>\n",
       "      <td>动作/犯罪</td>\n",
       "      <td>1905-06-16 00:00:00</td>\n",
       "      <td>114</td>\n",
       "      <td>7.3</td>\n",
       "      <td>美国</td>\n",
       "      <td>B</td>\n",
       "      <td>D</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>489 rows × 9 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                                                  名字    投票人数        类型  \\\n",
       "产地                                                                       \n",
       "美国                                            肖申克的救赎  692795     剧情/犯罪   \n",
       "美国                                              阿甘正传  580897     剧情/爱情   \n",
       "法国                                          这个杀手不太冷   662552  剧情/动作/犯罪   \n",
       "美国                                              34街的     768  剧情/家庭/奇幻   \n",
       "中国大陆                                              活着  202794     剧情/家庭   \n",
       "...                                              ...     ...       ...   \n",
       "美国                                          鬼精灵2： 恐怖      60  喜剧/恐怖/奇幻   \n",
       "英国                                             黑色第16      44     剧情/惊悚   \n",
       "日本                蜡笔小新之布里布里王国的秘密宝藏 クレヨンしんちゃん ブリブリ王国の    2142        动画   \n",
       "日本    龙珠Z剧场版10：两人面临危机! 超战士难以成眠 ドラゴンボール Z 劇場版：危険なふたり！     579        动画   \n",
       "中国香港                            重案实录之惊天械劫案 重案實錄之驚天械劫      90     动作/犯罪   \n",
       "\n",
       "                     上映时间   时长   评分    首映地点 评分等级 热门程度  \n",
       "产地                                                     \n",
       "美国    1994-09-10 00:00:00  142  9.6  多伦多电影节    A    A  \n",
       "美国    1994-06-23 00:00:00  142  9.4   洛杉矶首映    A    A  \n",
       "法国    1994-09-14 00:00:00  133  9.4      法国    A    A  \n",
       "美国    1994-12-23 00:00:00  114  7.9      美国    B    B  \n",
       "中国大陆  1994-05-18 00:00:00  132  9.0      法国    B    A  \n",
       "...                   ...  ...  ...     ...  ...  ...  \n",
       "美国    1994-04-08 00:00:00   85  5.8      美国    C    E  \n",
       "英国    1996-02-01 00:00:00  106  6.8      美国    C    E  \n",
       "日本    1994-04-23 00:00:00   94  7.7      日本    B    B  \n",
       "日本    1994-03-12 00:00:00   53  7.2      美国    B    C  \n",
       "中国香港  1905-06-16 00:00:00  114  7.3      美国    B    D  \n",
       "\n",
       "[489 rows x 9 columns]"
      ]
     },
     "execution_count": 22,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.loc[1994]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 取消层次化索引 "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>年代</th>\n",
       "      <th>产地</th>\n",
       "      <th>名字</th>\n",
       "      <th>投票人数</th>\n",
       "      <th>类型</th>\n",
       "      <th>上映时间</th>\n",
       "      <th>时长</th>\n",
       "      <th>评分</th>\n",
       "      <th>首映地点</th>\n",
       "      <th>评分等级</th>\n",
       "      <th>热门程度</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1994</td>\n",
       "      <td>美国</td>\n",
       "      <td>肖申克的救赎</td>\n",
       "      <td>692795</td>\n",
       "      <td>剧情/犯罪</td>\n",
       "      <td>1994-09-10 00:00:00</td>\n",
       "      <td>142</td>\n",
       "      <td>9.6</td>\n",
       "      <td>多伦多电影节</td>\n",
       "      <td>A</td>\n",
       "      <td>A</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1957</td>\n",
       "      <td>美国</td>\n",
       "      <td>控方证人</td>\n",
       "      <td>42995</td>\n",
       "      <td>剧情/悬疑/犯罪</td>\n",
       "      <td>1957-12-17 00:00:00</td>\n",
       "      <td>116</td>\n",
       "      <td>9.5</td>\n",
       "      <td>美国</td>\n",
       "      <td>A</td>\n",
       "      <td>A</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>1997</td>\n",
       "      <td>意大利</td>\n",
       "      <td>美丽人生</td>\n",
       "      <td>327855</td>\n",
       "      <td>剧情/喜剧/爱情</td>\n",
       "      <td>1997-12-20 00:00:00</td>\n",
       "      <td>116</td>\n",
       "      <td>9.5</td>\n",
       "      <td>意大利</td>\n",
       "      <td>A</td>\n",
       "      <td>A</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>1994</td>\n",
       "      <td>美国</td>\n",
       "      <td>阿甘正传</td>\n",
       "      <td>580897</td>\n",
       "      <td>剧情/爱情</td>\n",
       "      <td>1994-06-23 00:00:00</td>\n",
       "      <td>142</td>\n",
       "      <td>9.4</td>\n",
       "      <td>洛杉矶首映</td>\n",
       "      <td>A</td>\n",
       "      <td>A</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>1993</td>\n",
       "      <td>中国大陆</td>\n",
       "      <td>霸王别姬</td>\n",
       "      <td>478523</td>\n",
       "      <td>剧情/爱情/同性</td>\n",
       "      <td>1993-01-01 00:00:00</td>\n",
       "      <td>171</td>\n",
       "      <td>9.4</td>\n",
       "      <td>香港</td>\n",
       "      <td>A</td>\n",
       "      <td>A</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "     年代    产地      名字    投票人数        类型                 上映时间   时长   评分  \\\n",
       "0  1994    美国  肖申克的救赎  692795     剧情/犯罪  1994-09-10 00:00:00  142  9.6   \n",
       "1  1957    美国    控方证人   42995  剧情/悬疑/犯罪  1957-12-17 00:00:00  116  9.5   \n",
       "2  1997   意大利   美丽人生   327855  剧情/喜剧/爱情  1997-12-20 00:00:00  116  9.5   \n",
       "3  1994    美国    阿甘正传  580897     剧情/爱情  1994-06-23 00:00:00  142  9.4   \n",
       "4  1993  中国大陆    霸王别姬  478523  剧情/爱情/同性  1993-01-01 00:00:00  171  9.4   \n",
       "\n",
       "     首映地点 评分等级 热门程度  \n",
       "0  多伦多电影节    A    A  \n",
       "1      美国    A    A  \n",
       "2     意大利    A    A  \n",
       "3   洛杉矶首映    A    A  \n",
       "4      香港    A    A  "
      ]
     },
     "execution_count": 23,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df = df.reset_index()\n",
    "df[:5]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### (2)数据旋转 "
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "行列转化：以前5部电影为例"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>年代</th>\n",
       "      <th>产地</th>\n",
       "      <th>名字</th>\n",
       "      <th>投票人数</th>\n",
       "      <th>类型</th>\n",
       "      <th>上映时间</th>\n",
       "      <th>时长</th>\n",
       "      <th>评分</th>\n",
       "      <th>首映地点</th>\n",
       "      <th>评分等级</th>\n",
       "      <th>热门程度</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1994</td>\n",
       "      <td>美国</td>\n",
       "      <td>肖申克的救赎</td>\n",
       "      <td>692795</td>\n",
       "      <td>剧情/犯罪</td>\n",
       "      <td>1994-09-10 00:00:00</td>\n",
       "      <td>142</td>\n",
       "      <td>9.6</td>\n",
       "      <td>多伦多电影节</td>\n",
       "      <td>A</td>\n",
       "      <td>A</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1957</td>\n",
       "      <td>美国</td>\n",
       "      <td>控方证人</td>\n",
       "      <td>42995</td>\n",
       "      <td>剧情/悬疑/犯罪</td>\n",
       "      <td>1957-12-17 00:00:00</td>\n",
       "      <td>116</td>\n",
       "      <td>9.5</td>\n",
       "      <td>美国</td>\n",
       "      <td>A</td>\n",
       "      <td>A</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>1997</td>\n",
       "      <td>意大利</td>\n",
       "      <td>美丽人生</td>\n",
       "      <td>327855</td>\n",
       "      <td>剧情/喜剧/爱情</td>\n",
       "      <td>1997-12-20 00:00:00</td>\n",
       "      <td>116</td>\n",
       "      <td>9.5</td>\n",
       "      <td>意大利</td>\n",
       "      <td>A</td>\n",
       "      <td>A</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>1994</td>\n",
       "      <td>美国</td>\n",
       "      <td>阿甘正传</td>\n",
       "      <td>580897</td>\n",
       "      <td>剧情/爱情</td>\n",
       "      <td>1994-06-23 00:00:00</td>\n",
       "      <td>142</td>\n",
       "      <td>9.4</td>\n",
       "      <td>洛杉矶首映</td>\n",
       "      <td>A</td>\n",
       "      <td>A</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>1993</td>\n",
       "      <td>中国大陆</td>\n",
       "      <td>霸王别姬</td>\n",
       "      <td>478523</td>\n",
       "      <td>剧情/爱情/同性</td>\n",
       "      <td>1993-01-01 00:00:00</td>\n",
       "      <td>171</td>\n",
       "      <td>9.4</td>\n",
       "      <td>香港</td>\n",
       "      <td>A</td>\n",
       "      <td>A</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "     年代    产地      名字    投票人数        类型                 上映时间   时长   评分  \\\n",
       "0  1994    美国  肖申克的救赎  692795     剧情/犯罪  1994-09-10 00:00:00  142  9.6   \n",
       "1  1957    美国    控方证人   42995  剧情/悬疑/犯罪  1957-12-17 00:00:00  116  9.5   \n",
       "2  1997   意大利   美丽人生   327855  剧情/喜剧/爱情  1997-12-20 00:00:00  116  9.5   \n",
       "3  1994    美国    阿甘正传  580897     剧情/爱情  1994-06-23 00:00:00  142  9.4   \n",
       "4  1993  中国大陆    霸王别姬  478523  剧情/爱情/同性  1993-01-01 00:00:00  171  9.4   \n",
       "\n",
       "     首映地点 评分等级 热门程度  \n",
       "0  多伦多电影节    A    A  \n",
       "1      美国    A    A  \n",
       "2     意大利    A    A  \n",
       "3   洛杉矶首映    A    A  \n",
       "4      香港    A    A  "
      ]
     },
     "execution_count": 24,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data = df[:5]\n",
    "data"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    ".T可以直接让数据的行列进行交换"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>0</th>\n",
       "      <th>1</th>\n",
       "      <th>2</th>\n",
       "      <th>3</th>\n",
       "      <th>4</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>年代</th>\n",
       "      <td>1994</td>\n",
       "      <td>1957</td>\n",
       "      <td>1997</td>\n",
       "      <td>1994</td>\n",
       "      <td>1993</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>产地</th>\n",
       "      <td>美国</td>\n",
       "      <td>美国</td>\n",
       "      <td>意大利</td>\n",
       "      <td>美国</td>\n",
       "      <td>中国大陆</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>名字</th>\n",
       "      <td>肖申克的救赎</td>\n",
       "      <td>控方证人</td>\n",
       "      <td>美丽人生</td>\n",
       "      <td>阿甘正传</td>\n",
       "      <td>霸王别姬</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>投票人数</th>\n",
       "      <td>692795</td>\n",
       "      <td>42995</td>\n",
       "      <td>327855</td>\n",
       "      <td>580897</td>\n",
       "      <td>478523</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>类型</th>\n",
       "      <td>剧情/犯罪</td>\n",
       "      <td>剧情/悬疑/犯罪</td>\n",
       "      <td>剧情/喜剧/爱情</td>\n",
       "      <td>剧情/爱情</td>\n",
       "      <td>剧情/爱情/同性</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>上映时间</th>\n",
       "      <td>1994-09-10 00:00:00</td>\n",
       "      <td>1957-12-17 00:00:00</td>\n",
       "      <td>1997-12-20 00:00:00</td>\n",
       "      <td>1994-06-23 00:00:00</td>\n",
       "      <td>1993-01-01 00:00:00</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>时长</th>\n",
       "      <td>142</td>\n",
       "      <td>116</td>\n",
       "      <td>116</td>\n",
       "      <td>142</td>\n",
       "      <td>171</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>评分</th>\n",
       "      <td>9.6</td>\n",
       "      <td>9.5</td>\n",
       "      <td>9.5</td>\n",
       "      <td>9.4</td>\n",
       "      <td>9.4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>首映地点</th>\n",
       "      <td>多伦多电影节</td>\n",
       "      <td>美国</td>\n",
       "      <td>意大利</td>\n",
       "      <td>洛杉矶首映</td>\n",
       "      <td>香港</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>评分等级</th>\n",
       "      <td>A</td>\n",
       "      <td>A</td>\n",
       "      <td>A</td>\n",
       "      <td>A</td>\n",
       "      <td>A</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>热门程度</th>\n",
       "      <td>A</td>\n",
       "      <td>A</td>\n",
       "      <td>A</td>\n",
       "      <td>A</td>\n",
       "      <td>A</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                        0                    1                    2  \\\n",
       "年代                   1994                 1957                 1997   \n",
       "产地                     美国                   美国                  意大利   \n",
       "名字                 肖申克的救赎                 控方证人                美丽人生    \n",
       "投票人数               692795                42995               327855   \n",
       "类型                  剧情/犯罪             剧情/悬疑/犯罪             剧情/喜剧/爱情   \n",
       "上映时间  1994-09-10 00:00:00  1957-12-17 00:00:00  1997-12-20 00:00:00   \n",
       "时长                    142                  116                  116   \n",
       "评分                    9.6                  9.5                  9.5   \n",
       "首映地点               多伦多电影节                   美国                  意大利   \n",
       "评分等级                    A                    A                    A   \n",
       "热门程度                    A                    A                    A   \n",
       "\n",
       "                        3                    4  \n",
       "年代                   1994                 1993  \n",
       "产地                     美国                 中国大陆  \n",
       "名字                   阿甘正传                 霸王别姬  \n",
       "投票人数               580897               478523  \n",
       "类型                  剧情/爱情             剧情/爱情/同性  \n",
       "上映时间  1994-06-23 00:00:00  1993-01-01 00:00:00  \n",
       "时长                    142                  171  \n",
       "评分                    9.4                  9.4  \n",
       "首映地点                洛杉矶首映                   香港  \n",
       "评分等级                    A                    A  \n",
       "热门程度                    A                    A  "
      ]
     },
     "execution_count": 25,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data.T"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### dataframe也可以使用stack和unstack，转化为层次化索引的Series "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0  年代                     1994\n",
       "   产地                       美国\n",
       "   名字                   肖申克的救赎\n",
       "   投票人数                 692795\n",
       "   类型                    剧情/犯罪\n",
       "   上映时间    1994-09-10 00:00:00\n",
       "   时长                      142\n",
       "   评分                      9.6\n",
       "   首映地点                 多伦多电影节\n",
       "   评分等级                      A\n",
       "   热门程度                      A\n",
       "1  年代                     1957\n",
       "   产地                       美国\n",
       "   名字                     控方证人\n",
       "   投票人数                  42995\n",
       "   类型                 剧情/悬疑/犯罪\n",
       "   上映时间    1957-12-17 00:00:00\n",
       "   时长                      116\n",
       "   评分                      9.5\n",
       "   首映地点                     美国\n",
       "   评分等级                      A\n",
       "   热门程度                      A\n",
       "2  年代                     1997\n",
       "   产地                      意大利\n",
       "   名字                    美丽人生 \n",
       "   投票人数                 327855\n",
       "   类型                 剧情/喜剧/爱情\n",
       "   上映时间    1997-12-20 00:00:00\n",
       "   时长                      116\n",
       "   评分                      9.5\n",
       "   首映地点                    意大利\n",
       "   评分等级                      A\n",
       "   热门程度                      A\n",
       "3  年代                     1994\n",
       "   产地                       美国\n",
       "   名字                     阿甘正传\n",
       "   投票人数                 580897\n",
       "   类型                    剧情/爱情\n",
       "   上映时间    1994-06-23 00:00:00\n",
       "   时长                      142\n",
       "   评分                      9.4\n",
       "   首映地点                  洛杉矶首映\n",
       "   评分等级                      A\n",
       "   热门程度                      A\n",
       "4  年代                     1993\n",
       "   产地                     中国大陆\n",
       "   名字                     霸王别姬\n",
       "   投票人数                 478523\n",
       "   类型                 剧情/爱情/同性\n",
       "   上映时间    1993-01-01 00:00:00\n",
       "   时长                      171\n",
       "   评分                      9.4\n",
       "   首映地点                     香港\n",
       "   评分等级                      A\n",
       "   热门程度                      A\n",
       "dtype: object"
      ]
     },
     "execution_count": 26,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data.stack()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>年代</th>\n",
       "      <th>产地</th>\n",
       "      <th>名字</th>\n",
       "      <th>投票人数</th>\n",
       "      <th>类型</th>\n",
       "      <th>上映时间</th>\n",
       "      <th>时长</th>\n",
       "      <th>评分</th>\n",
       "      <th>首映地点</th>\n",
       "      <th>评分等级</th>\n",
       "      <th>热门程度</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1994</td>\n",
       "      <td>美国</td>\n",
       "      <td>肖申克的救赎</td>\n",
       "      <td>692795</td>\n",
       "      <td>剧情/犯罪</td>\n",
       "      <td>1994-09-10</td>\n",
       "      <td>142</td>\n",
       "      <td>9.6</td>\n",
       "      <td>多伦多电影节</td>\n",
       "      <td>A</td>\n",
       "      <td>A</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1957</td>\n",
       "      <td>美国</td>\n",
       "      <td>控方证人</td>\n",
       "      <td>42995</td>\n",
       "      <td>剧情/悬疑/犯罪</td>\n",
       "      <td>1957-12-17</td>\n",
       "      <td>116</td>\n",
       "      <td>9.5</td>\n",
       "      <td>美国</td>\n",
       "      <td>A</td>\n",
       "      <td>A</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>1997</td>\n",
       "      <td>意大利</td>\n",
       "      <td>美丽人生</td>\n",
       "      <td>327855</td>\n",
       "      <td>剧情/喜剧/爱情</td>\n",
       "      <td>1997-12-20</td>\n",
       "      <td>116</td>\n",
       "      <td>9.5</td>\n",
       "      <td>意大利</td>\n",
       "      <td>A</td>\n",
       "      <td>A</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>1994</td>\n",
       "      <td>美国</td>\n",
       "      <td>阿甘正传</td>\n",
       "      <td>580897</td>\n",
       "      <td>剧情/爱情</td>\n",
       "      <td>1994-06-23</td>\n",
       "      <td>142</td>\n",
       "      <td>9.4</td>\n",
       "      <td>洛杉矶首映</td>\n",
       "      <td>A</td>\n",
       "      <td>A</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>1993</td>\n",
       "      <td>中国大陆</td>\n",
       "      <td>霸王别姬</td>\n",
       "      <td>478523</td>\n",
       "      <td>剧情/爱情/同性</td>\n",
       "      <td>1993-01-01</td>\n",
       "      <td>171</td>\n",
       "      <td>9.4</td>\n",
       "      <td>香港</td>\n",
       "      <td>A</td>\n",
       "      <td>A</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "     年代    产地      名字    投票人数        类型       上映时间   时长   评分    首映地点 评分等级 热门程度\n",
       "0  1994    美国  肖申克的救赎  692795     剧情/犯罪 1994-09-10  142  9.6  多伦多电影节    A    A\n",
       "1  1957    美国    控方证人   42995  剧情/悬疑/犯罪 1957-12-17  116  9.5      美国    A    A\n",
       "2  1997   意大利   美丽人生   327855  剧情/喜剧/爱情 1997-12-20  116  9.5     意大利    A    A\n",
       "3  1994    美国    阿甘正传  580897     剧情/爱情 1994-06-23  142  9.4   洛杉矶首映    A    A\n",
       "4  1993  中国大陆    霸王别姬  478523  剧情/爱情/同性 1993-01-01  171  9.4      香港    A    A"
      ]
     },
     "execution_count": 27,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data.stack().unstack()  #转回来"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 3.2 数据分组，分组运算"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### GroupBy技术：实现数据的分组，和分组运算，作用类似于数据透视表 \n",
    "![test2](45f3e7f7e15bdc7e5ef4d01267073a4.jpg)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 按照电影的产地进行分组 "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {},
   "outputs": [],
   "source": [
    "group = df.groupby(df[\"产地\"])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 先定义一个分组变量group "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "pandas.core.groupby.generic.DataFrameGroupBy"
      ]
     },
     "execution_count": 29,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "type(group)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 可以计算分组后各个的统计量 "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>年代</th>\n",
       "      <th>投票人数</th>\n",
       "      <th>时长</th>\n",
       "      <th>评分</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>产地</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>中国台湾</th>\n",
       "      <td>1999.009709</td>\n",
       "      <td>8474.864078</td>\n",
       "      <td>87.257282</td>\n",
       "      <td>7.066667</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>中国大陆</th>\n",
       "      <td>2004.582432</td>\n",
       "      <td>10915.587708</td>\n",
       "      <td>81.517014</td>\n",
       "      <td>6.062991</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>中国香港</th>\n",
       "      <td>1991.088865</td>\n",
       "      <td>8141.709870</td>\n",
       "      <td>88.553214</td>\n",
       "      <td>6.473551</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>丹麦</th>\n",
       "      <td>1999.091371</td>\n",
       "      <td>2003.781726</td>\n",
       "      <td>88.507614</td>\n",
       "      <td>7.246701</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>俄罗斯</th>\n",
       "      <td>1984.892857</td>\n",
       "      <td>1021.180672</td>\n",
       "      <td>96.100840</td>\n",
       "      <td>7.557143</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>其他</th>\n",
       "      <td>1998.721721</td>\n",
       "      <td>1619.144450</td>\n",
       "      <td>87.656399</td>\n",
       "      <td>7.226713</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>加拿大</th>\n",
       "      <td>2002.520451</td>\n",
       "      <td>1921.834979</td>\n",
       "      <td>80.592384</td>\n",
       "      <td>6.727221</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>印度</th>\n",
       "      <td>2006.039326</td>\n",
       "      <td>3219.587079</td>\n",
       "      <td>120.949438</td>\n",
       "      <td>6.864888</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>墨西哥</th>\n",
       "      <td>1992.786325</td>\n",
       "      <td>1191.982906</td>\n",
       "      <td>92.641026</td>\n",
       "      <td>7.085470</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>巴西</th>\n",
       "      <td>1999.888889</td>\n",
       "      <td>3606.333333</td>\n",
       "      <td>88.373737</td>\n",
       "      <td>7.232323</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>德国</th>\n",
       "      <td>1996.053869</td>\n",
       "      <td>2624.736533</td>\n",
       "      <td>92.258570</td>\n",
       "      <td>7.187365</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>意大利</th>\n",
       "      <td>1985.599190</td>\n",
       "      <td>3374.955466</td>\n",
       "      <td>104.333333</td>\n",
       "      <td>7.183131</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>日本</th>\n",
       "      <td>1999.886536</td>\n",
       "      <td>3592.015781</td>\n",
       "      <td>85.010587</td>\n",
       "      <td>7.192569</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>比利时</th>\n",
       "      <td>1999.503650</td>\n",
       "      <td>1244.153285</td>\n",
       "      <td>83.065693</td>\n",
       "      <td>7.197080</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>法国</th>\n",
       "      <td>1991.794044</td>\n",
       "      <td>3663.066380</td>\n",
       "      <td>90.249013</td>\n",
       "      <td>7.243093</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>波兰</th>\n",
       "      <td>1987.027624</td>\n",
       "      <td>881.640884</td>\n",
       "      <td>80.734807</td>\n",
       "      <td>7.441989</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>泰国</th>\n",
       "      <td>2009.129252</td>\n",
       "      <td>5322.724490</td>\n",
       "      <td>88.442177</td>\n",
       "      <td>6.109184</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>澳大利亚</th>\n",
       "      <td>2002.966102</td>\n",
       "      <td>4798.111864</td>\n",
       "      <td>85.593220</td>\n",
       "      <td>6.953559</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>瑞典</th>\n",
       "      <td>1987.106952</td>\n",
       "      <td>1549.700535</td>\n",
       "      <td>94.625668</td>\n",
       "      <td>7.425668</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>美国</th>\n",
       "      <td>1994.519891</td>\n",
       "      <td>8677.294861</td>\n",
       "      <td>89.976097</td>\n",
       "      <td>6.923351</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>英国</th>\n",
       "      <td>1996.630926</td>\n",
       "      <td>4979.837848</td>\n",
       "      <td>89.213318</td>\n",
       "      <td>7.498420</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>荷兰</th>\n",
       "      <td>2001.198675</td>\n",
       "      <td>957.589404</td>\n",
       "      <td>75.887417</td>\n",
       "      <td>7.160265</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>西班牙</th>\n",
       "      <td>2001.546275</td>\n",
       "      <td>3355.266366</td>\n",
       "      <td>90.905192</td>\n",
       "      <td>7.025056</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>阿根廷</th>\n",
       "      <td>2004.212389</td>\n",
       "      <td>2283.938053</td>\n",
       "      <td>92.548673</td>\n",
       "      <td>7.248673</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>韩国</th>\n",
       "      <td>2008.100596</td>\n",
       "      <td>6527.518629</td>\n",
       "      <td>100.018629</td>\n",
       "      <td>6.351118</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "               年代          投票人数          时长        评分\n",
       "产地                                                   \n",
       "中国台湾  1999.009709   8474.864078   87.257282  7.066667\n",
       "中国大陆  2004.582432  10915.587708   81.517014  6.062991\n",
       "中国香港  1991.088865   8141.709870   88.553214  6.473551\n",
       "丹麦    1999.091371   2003.781726   88.507614  7.246701\n",
       "俄罗斯   1984.892857   1021.180672   96.100840  7.557143\n",
       "其他    1998.721721   1619.144450   87.656399  7.226713\n",
       "加拿大   2002.520451   1921.834979   80.592384  6.727221\n",
       "印度    2006.039326   3219.587079  120.949438  6.864888\n",
       "墨西哥   1992.786325   1191.982906   92.641026  7.085470\n",
       "巴西    1999.888889   3606.333333   88.373737  7.232323\n",
       "德国    1996.053869   2624.736533   92.258570  7.187365\n",
       "意大利   1985.599190   3374.955466  104.333333  7.183131\n",
       "日本    1999.886536   3592.015781   85.010587  7.192569\n",
       "比利时   1999.503650   1244.153285   83.065693  7.197080\n",
       "法国    1991.794044   3663.066380   90.249013  7.243093\n",
       "波兰    1987.027624    881.640884   80.734807  7.441989\n",
       "泰国    2009.129252   5322.724490   88.442177  6.109184\n",
       "澳大利亚  2002.966102   4798.111864   85.593220  6.953559\n",
       "瑞典    1987.106952   1549.700535   94.625668  7.425668\n",
       "美国    1994.519891   8677.294861   89.976097  6.923351\n",
       "英国    1996.630926   4979.837848   89.213318  7.498420\n",
       "荷兰    2001.198675    957.589404   75.887417  7.160265\n",
       "西班牙   2001.546275   3355.266366   90.905192  7.025056\n",
       "阿根廷   2004.212389   2283.938053   92.548673  7.248673\n",
       "韩国    2008.100596   6527.518629  100.018629  6.351118"
      ]
     },
     "execution_count": 30,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "group.mean() "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>年代</th>\n",
       "      <th>投票人数</th>\n",
       "      <th>时长</th>\n",
       "      <th>评分</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>产地</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>中国台湾</th>\n",
       "      <td>1235388</td>\n",
       "      <td>5237466</td>\n",
       "      <td>53925</td>\n",
       "      <td>4367.200000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>中国大陆</th>\n",
       "      <td>7599372</td>\n",
       "      <td>41380993</td>\n",
       "      <td>309031</td>\n",
       "      <td>22984.800000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>中国香港</th>\n",
       "      <td>5668630</td>\n",
       "      <td>23179448</td>\n",
       "      <td>252111</td>\n",
       "      <td>18430.200000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>丹麦</th>\n",
       "      <td>393821</td>\n",
       "      <td>394745</td>\n",
       "      <td>17436</td>\n",
       "      <td>1427.600000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>俄罗斯</th>\n",
       "      <td>944809</td>\n",
       "      <td>486082</td>\n",
       "      <td>45744</td>\n",
       "      <td>3597.200000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>其他</th>\n",
       "      <td>3763593</td>\n",
       "      <td>3048849</td>\n",
       "      <td>165057</td>\n",
       "      <td>13607.900000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>加拿大</th>\n",
       "      <td>1419787</td>\n",
       "      <td>1362581</td>\n",
       "      <td>57140</td>\n",
       "      <td>4769.600000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>印度</th>\n",
       "      <td>714150</td>\n",
       "      <td>1146173</td>\n",
       "      <td>43058</td>\n",
       "      <td>2443.900000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>墨西哥</th>\n",
       "      <td>233156</td>\n",
       "      <td>139462</td>\n",
       "      <td>10839</td>\n",
       "      <td>829.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>巴西</th>\n",
       "      <td>197989</td>\n",
       "      <td>357027</td>\n",
       "      <td>8749</td>\n",
       "      <td>716.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>德国</th>\n",
       "      <td>2037971</td>\n",
       "      <td>2679856</td>\n",
       "      <td>94196</td>\n",
       "      <td>7338.300000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>意大利</th>\n",
       "      <td>1471329</td>\n",
       "      <td>2500842</td>\n",
       "      <td>77311</td>\n",
       "      <td>5322.700000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>日本</th>\n",
       "      <td>10011432</td>\n",
       "      <td>17981631</td>\n",
       "      <td>425563</td>\n",
       "      <td>36006.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>比利时</th>\n",
       "      <td>273932</td>\n",
       "      <td>170449</td>\n",
       "      <td>11380</td>\n",
       "      <td>986.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>法国</th>\n",
       "      <td>5551130</td>\n",
       "      <td>10208966</td>\n",
       "      <td>251524</td>\n",
       "      <td>20186.500000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>波兰</th>\n",
       "      <td>359652</td>\n",
       "      <td>159577</td>\n",
       "      <td>14613</td>\n",
       "      <td>1347.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>泰国</th>\n",
       "      <td>590684</td>\n",
       "      <td>1564881</td>\n",
       "      <td>26002</td>\n",
       "      <td>1796.100000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>澳大利亚</th>\n",
       "      <td>590875</td>\n",
       "      <td>1415443</td>\n",
       "      <td>25250</td>\n",
       "      <td>2051.300000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>瑞典</th>\n",
       "      <td>371589</td>\n",
       "      <td>289794</td>\n",
       "      <td>17695</td>\n",
       "      <td>1388.600000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>美国</th>\n",
       "      <td>23363806</td>\n",
       "      <td>101645832</td>\n",
       "      <td>1053980</td>\n",
       "      <td>81100.135704</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>英国</th>\n",
       "      <td>5307045</td>\n",
       "      <td>13236409</td>\n",
       "      <td>237129</td>\n",
       "      <td>19930.800000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>荷兰</th>\n",
       "      <td>302181</td>\n",
       "      <td>144596</td>\n",
       "      <td>11459</td>\n",
       "      <td>1081.200000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>西班牙</th>\n",
       "      <td>886685</td>\n",
       "      <td>1486383</td>\n",
       "      <td>40271</td>\n",
       "      <td>3112.100000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>阿根廷</th>\n",
       "      <td>226476</td>\n",
       "      <td>258085</td>\n",
       "      <td>10458</td>\n",
       "      <td>819.100000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>韩国</th>\n",
       "      <td>2694871</td>\n",
       "      <td>8759930</td>\n",
       "      <td>134225</td>\n",
       "      <td>8523.200000</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "            年代       投票人数       时长            评分\n",
       "产地                                              \n",
       "中国台湾   1235388    5237466    53925   4367.200000\n",
       "中国大陆   7599372   41380993   309031  22984.800000\n",
       "中国香港   5668630   23179448   252111  18430.200000\n",
       "丹麦      393821     394745    17436   1427.600000\n",
       "俄罗斯     944809     486082    45744   3597.200000\n",
       "其他     3763593    3048849   165057  13607.900000\n",
       "加拿大    1419787    1362581    57140   4769.600000\n",
       "印度      714150    1146173    43058   2443.900000\n",
       "墨西哥     233156     139462    10839    829.000000\n",
       "巴西      197989     357027     8749    716.000000\n",
       "德国     2037971    2679856    94196   7338.300000\n",
       "意大利    1471329    2500842    77311   5322.700000\n",
       "日本    10011432   17981631   425563  36006.000000\n",
       "比利时     273932     170449    11380    986.000000\n",
       "法国     5551130   10208966   251524  20186.500000\n",
       "波兰      359652     159577    14613   1347.000000\n",
       "泰国      590684    1564881    26002   1796.100000\n",
       "澳大利亚    590875    1415443    25250   2051.300000\n",
       "瑞典      371589     289794    17695   1388.600000\n",
       "美国    23363806  101645832  1053980  81100.135704\n",
       "英国     5307045   13236409   237129  19930.800000\n",
       "荷兰      302181     144596    11459   1081.200000\n",
       "西班牙     886685    1486383    40271   3112.100000\n",
       "阿根廷     226476     258085    10458    819.100000\n",
       "韩国     2694871    8759930   134225   8523.200000"
      ]
     },
     "execution_count": 31,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "group.sum()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 计算每年的平均评分 "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "年代\n",
       "1888    7.950000\n",
       "1890    4.800000\n",
       "1892    7.500000\n",
       "1894    6.633333\n",
       "1895    7.575000\n",
       "          ...   \n",
       "2013    6.375974\n",
       "2014    6.249384\n",
       "2015    6.121925\n",
       "2016    5.834524\n",
       "2018    6.935704\n",
       "Name: 评分, Length: 127, dtype: float64"
      ]
     },
     "execution_count": 32,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df[\"评分\"].groupby(df[\"年代\"]).mean()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 只会对数值变量进行分组运算 "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>投票人数</th>\n",
       "      <th>时长</th>\n",
       "      <th>评分</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>产地</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>中国台湾</th>\n",
       "      <td>487.0</td>\n",
       "      <td>92.0</td>\n",
       "      <td>7.1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>中国大陆</th>\n",
       "      <td>502.0</td>\n",
       "      <td>90.0</td>\n",
       "      <td>6.4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>中国香港</th>\n",
       "      <td>637.0</td>\n",
       "      <td>92.0</td>\n",
       "      <td>6.5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>丹麦</th>\n",
       "      <td>182.0</td>\n",
       "      <td>94.0</td>\n",
       "      <td>7.3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>俄罗斯</th>\n",
       "      <td>132.5</td>\n",
       "      <td>93.0</td>\n",
       "      <td>7.7</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>其他</th>\n",
       "      <td>158.0</td>\n",
       "      <td>90.0</td>\n",
       "      <td>7.4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>加拿大</th>\n",
       "      <td>258.0</td>\n",
       "      <td>89.0</td>\n",
       "      <td>6.8</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>印度</th>\n",
       "      <td>139.0</td>\n",
       "      <td>131.0</td>\n",
       "      <td>7.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>墨西哥</th>\n",
       "      <td>183.0</td>\n",
       "      <td>94.0</td>\n",
       "      <td>7.2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>巴西</th>\n",
       "      <td>131.0</td>\n",
       "      <td>96.0</td>\n",
       "      <td>7.3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>德国</th>\n",
       "      <td>212.0</td>\n",
       "      <td>94.0</td>\n",
       "      <td>7.3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>意大利</th>\n",
       "      <td>187.0</td>\n",
       "      <td>101.0</td>\n",
       "      <td>7.3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>日本</th>\n",
       "      <td>359.0</td>\n",
       "      <td>89.0</td>\n",
       "      <td>7.3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>比利时</th>\n",
       "      <td>226.0</td>\n",
       "      <td>90.0</td>\n",
       "      <td>7.3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>法国</th>\n",
       "      <td>244.0</td>\n",
       "      <td>95.0</td>\n",
       "      <td>7.3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>波兰</th>\n",
       "      <td>174.0</td>\n",
       "      <td>87.0</td>\n",
       "      <td>7.5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>泰国</th>\n",
       "      <td>542.5</td>\n",
       "      <td>92.5</td>\n",
       "      <td>6.2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>澳大利亚</th>\n",
       "      <td>323.0</td>\n",
       "      <td>95.0</td>\n",
       "      <td>7.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>瑞典</th>\n",
       "      <td>191.0</td>\n",
       "      <td>96.0</td>\n",
       "      <td>7.6</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>美国</th>\n",
       "      <td>415.0</td>\n",
       "      <td>93.0</td>\n",
       "      <td>7.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>英国</th>\n",
       "      <td>345.0</td>\n",
       "      <td>92.0</td>\n",
       "      <td>7.6</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>荷兰</th>\n",
       "      <td>180.0</td>\n",
       "      <td>85.0</td>\n",
       "      <td>7.3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>西班牙</th>\n",
       "      <td>267.0</td>\n",
       "      <td>97.0</td>\n",
       "      <td>7.1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>阿根廷</th>\n",
       "      <td>146.0</td>\n",
       "      <td>97.0</td>\n",
       "      <td>7.3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>韩国</th>\n",
       "      <td>1007.0</td>\n",
       "      <td>104.0</td>\n",
       "      <td>6.5</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "        投票人数     时长   评分\n",
       "产地                      \n",
       "中国台湾   487.0   92.0  7.1\n",
       "中国大陆   502.0   90.0  6.4\n",
       "中国香港   637.0   92.0  6.5\n",
       "丹麦     182.0   94.0  7.3\n",
       "俄罗斯    132.5   93.0  7.7\n",
       "其他     158.0   90.0  7.4\n",
       "加拿大    258.0   89.0  6.8\n",
       "印度     139.0  131.0  7.0\n",
       "墨西哥    183.0   94.0  7.2\n",
       "巴西     131.0   96.0  7.3\n",
       "德国     212.0   94.0  7.3\n",
       "意大利    187.0  101.0  7.3\n",
       "日本     359.0   89.0  7.3\n",
       "比利时    226.0   90.0  7.3\n",
       "法国     244.0   95.0  7.3\n",
       "波兰     174.0   87.0  7.5\n",
       "泰国     542.5   92.5  6.2\n",
       "澳大利亚   323.0   95.0  7.0\n",
       "瑞典     191.0   96.0  7.6\n",
       "美国     415.0   93.0  7.0\n",
       "英国     345.0   92.0  7.6\n",
       "荷兰     180.0   85.0  7.3\n",
       "西班牙    267.0   97.0  7.1\n",
       "阿根廷    146.0   97.0  7.3\n",
       "韩国    1007.0  104.0  6.5"
      ]
     },
     "execution_count": 33,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df[\"年代\"] = df[\"年代\"].astype(\"str\")\n",
    "df.groupby(df[\"产地\"]).median() #不会再对年代进行求取"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 我们也可以传入多个分组变量 "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th>投票人数</th>\n",
       "      <th>时长</th>\n",
       "      <th>评分</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>产地</th>\n",
       "      <th>年代</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th rowspan=\"5\" valign=\"top\">中国台湾</th>\n",
       "      <th>1963</th>\n",
       "      <td>121.000000</td>\n",
       "      <td>113.000000</td>\n",
       "      <td>6.400000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1965</th>\n",
       "      <td>153.666667</td>\n",
       "      <td>105.000000</td>\n",
       "      <td>6.800000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1966</th>\n",
       "      <td>51.000000</td>\n",
       "      <td>60.000000</td>\n",
       "      <td>7.900000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1967</th>\n",
       "      <td>4444.000000</td>\n",
       "      <td>112.000000</td>\n",
       "      <td>8.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1968</th>\n",
       "      <td>89.000000</td>\n",
       "      <td>83.000000</td>\n",
       "      <td>7.400000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"5\" valign=\"top\">韩国</th>\n",
       "      <th>2012</th>\n",
       "      <td>5812.542857</td>\n",
       "      <td>100.771429</td>\n",
       "      <td>6.035238</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2013</th>\n",
       "      <td>10470.370370</td>\n",
       "      <td>97.731481</td>\n",
       "      <td>6.062037</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2014</th>\n",
       "      <td>3776.266667</td>\n",
       "      <td>98.666667</td>\n",
       "      <td>5.650833</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2015</th>\n",
       "      <td>3209.247706</td>\n",
       "      <td>100.266055</td>\n",
       "      <td>5.423853</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2016</th>\n",
       "      <td>1739.850000</td>\n",
       "      <td>106.100000</td>\n",
       "      <td>5.730000</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>1578 rows × 3 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                   投票人数          时长        评分\n",
       "产地   年代                                      \n",
       "中国台湾 1963    121.000000  113.000000  6.400000\n",
       "     1965    153.666667  105.000000  6.800000\n",
       "     1966     51.000000   60.000000  7.900000\n",
       "     1967   4444.000000  112.000000  8.000000\n",
       "     1968     89.000000   83.000000  7.400000\n",
       "...                 ...         ...       ...\n",
       "韩国   2012   5812.542857  100.771429  6.035238\n",
       "     2013  10470.370370   97.731481  6.062037\n",
       "     2014   3776.266667   98.666667  5.650833\n",
       "     2015   3209.247706  100.266055  5.423853\n",
       "     2016   1739.850000  106.100000  5.730000\n",
       "\n",
       "[1578 rows x 3 columns]"
      ]
     },
     "execution_count": 34,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.groupby([df[\"产地\"],df[\"年代\"]]).mean() #根据两个变量进行分组"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 获得每个地区，每一年的电影的评分的均值 "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "产地    年代  \n",
       "中国台湾  1963    6.400000\n",
       "      1965    6.800000\n",
       "      1966    7.900000\n",
       "      1967    8.000000\n",
       "      1968    7.400000\n",
       "                ...   \n",
       "韩国    2012    6.035238\n",
       "      2013    6.062037\n",
       "      2014    5.650833\n",
       "      2015    5.423853\n",
       "      2016    5.730000\n",
       "Name: 评分, Length: 1578, dtype: float64"
      ]
     },
     "execution_count": 35,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "group = df[\"评分\"].groupby([df[\"产地\"], df[\"年代\"]])\n",
    "means = group.mean()\n",
    "means"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 36,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "产地    年代  \n",
       "中国台湾  1963    6.400000\n",
       "      1965    6.800000\n",
       "      1966    7.900000\n",
       "      1967    8.000000\n",
       "      1968    7.400000\n",
       "                ...   \n",
       "韩国    2012    6.035238\n",
       "      2013    6.062037\n",
       "      2014    5.650833\n",
       "      2015    5.423853\n",
       "      2016    5.730000\n",
       "Name: 评分, Length: 1578, dtype: float64"
      ]
     },
     "execution_count": 36,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "means = group = df[\"评分\"].groupby([df[\"产地\"], df[\"年代\"]]).mean()\n",
    "means"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Series通过unstack方法转化为dataframe\n",
    "\n",
    "### 会产生缺失值"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 37,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th>产地</th>\n",
       "      <th>中国台湾</th>\n",
       "      <th>中国大陆</th>\n",
       "      <th>中国香港</th>\n",
       "      <th>丹麦</th>\n",
       "      <th>俄罗斯</th>\n",
       "      <th>其他</th>\n",
       "      <th>加拿大</th>\n",
       "      <th>印度</th>\n",
       "      <th>墨西哥</th>\n",
       "      <th>巴西</th>\n",
       "      <th>...</th>\n",
       "      <th>波兰</th>\n",
       "      <th>泰国</th>\n",
       "      <th>澳大利亚</th>\n",
       "      <th>瑞典</th>\n",
       "      <th>美国</th>\n",
       "      <th>英国</th>\n",
       "      <th>荷兰</th>\n",
       "      <th>西班牙</th>\n",
       "      <th>阿根廷</th>\n",
       "      <th>韩国</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>年代</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>1888</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>7.950000</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1890</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>4.800000</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1892</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1894</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>6.450000</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1895</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2013</th>\n",
       "      <td>7.076471</td>\n",
       "      <td>5.306500</td>\n",
       "      <td>6.105714</td>\n",
       "      <td>6.555556</td>\n",
       "      <td>6.875000</td>\n",
       "      <td>6.853571</td>\n",
       "      <td>6.018182</td>\n",
       "      <td>6.400000</td>\n",
       "      <td>6.983333</td>\n",
       "      <td>8.00</td>\n",
       "      <td>...</td>\n",
       "      <td>6.966667</td>\n",
       "      <td>5.568000</td>\n",
       "      <td>6.76000</td>\n",
       "      <td>7.100</td>\n",
       "      <td>6.308255</td>\n",
       "      <td>7.460140</td>\n",
       "      <td>6.33</td>\n",
       "      <td>6.358333</td>\n",
       "      <td>6.616667</td>\n",
       "      <td>6.062037</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2014</th>\n",
       "      <td>6.522222</td>\n",
       "      <td>4.963830</td>\n",
       "      <td>5.616667</td>\n",
       "      <td>7.120000</td>\n",
       "      <td>7.175000</td>\n",
       "      <td>6.596250</td>\n",
       "      <td>5.921739</td>\n",
       "      <td>6.374194</td>\n",
       "      <td>7.250000</td>\n",
       "      <td>6.86</td>\n",
       "      <td>...</td>\n",
       "      <td>7.060000</td>\n",
       "      <td>5.653571</td>\n",
       "      <td>6.56875</td>\n",
       "      <td>6.960</td>\n",
       "      <td>6.393056</td>\n",
       "      <td>7.253398</td>\n",
       "      <td>7.30</td>\n",
       "      <td>6.868750</td>\n",
       "      <td>7.150000</td>\n",
       "      <td>5.650833</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2015</th>\n",
       "      <td>6.576000</td>\n",
       "      <td>4.969189</td>\n",
       "      <td>5.589189</td>\n",
       "      <td>7.166667</td>\n",
       "      <td>7.342857</td>\n",
       "      <td>6.732727</td>\n",
       "      <td>6.018750</td>\n",
       "      <td>6.736364</td>\n",
       "      <td>6.500000</td>\n",
       "      <td>6.76</td>\n",
       "      <td>...</td>\n",
       "      <td>6.300000</td>\n",
       "      <td>5.846667</td>\n",
       "      <td>6.88000</td>\n",
       "      <td>7.625</td>\n",
       "      <td>6.231486</td>\n",
       "      <td>7.123256</td>\n",
       "      <td>6.70</td>\n",
       "      <td>6.514286</td>\n",
       "      <td>7.233333</td>\n",
       "      <td>5.423853</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2016</th>\n",
       "      <td>NaN</td>\n",
       "      <td>4.712000</td>\n",
       "      <td>5.390909</td>\n",
       "      <td>7.000000</td>\n",
       "      <td>NaN</td>\n",
       "      <td>6.833333</td>\n",
       "      <td>6.200000</td>\n",
       "      <td>6.900000</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>6.522581</td>\n",
       "      <td>7.200000</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>5.730000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2018</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>6.935704</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>127 rows × 25 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "产地        中国台湾      中国大陆      中国香港        丹麦       俄罗斯        其他       加拿大  \\\n",
       "年代                                                                           \n",
       "1888       NaN       NaN       NaN       NaN       NaN       NaN       NaN   \n",
       "1890       NaN       NaN       NaN       NaN       NaN       NaN       NaN   \n",
       "1892       NaN       NaN       NaN       NaN       NaN       NaN       NaN   \n",
       "1894       NaN       NaN       NaN       NaN       NaN       NaN       NaN   \n",
       "1895       NaN       NaN       NaN       NaN       NaN       NaN       NaN   \n",
       "...        ...       ...       ...       ...       ...       ...       ...   \n",
       "2013  7.076471  5.306500  6.105714  6.555556  6.875000  6.853571  6.018182   \n",
       "2014  6.522222  4.963830  5.616667  7.120000  7.175000  6.596250  5.921739   \n",
       "2015  6.576000  4.969189  5.589189  7.166667  7.342857  6.732727  6.018750   \n",
       "2016       NaN  4.712000  5.390909  7.000000       NaN  6.833333  6.200000   \n",
       "2018       NaN       NaN       NaN       NaN       NaN       NaN       NaN   \n",
       "\n",
       "产地          印度       墨西哥    巴西  ...        波兰        泰国     澳大利亚     瑞典  \\\n",
       "年代                              ...                                       \n",
       "1888       NaN       NaN   NaN  ...       NaN       NaN      NaN    NaN   \n",
       "1890       NaN       NaN   NaN  ...       NaN       NaN      NaN    NaN   \n",
       "1892       NaN       NaN   NaN  ...       NaN       NaN      NaN    NaN   \n",
       "1894       NaN       NaN   NaN  ...       NaN       NaN      NaN    NaN   \n",
       "1895       NaN       NaN   NaN  ...       NaN       NaN      NaN    NaN   \n",
       "...        ...       ...   ...  ...       ...       ...      ...    ...   \n",
       "2013  6.400000  6.983333  8.00  ...  6.966667  5.568000  6.76000  7.100   \n",
       "2014  6.374194  7.250000  6.86  ...  7.060000  5.653571  6.56875  6.960   \n",
       "2015  6.736364  6.500000  6.76  ...  6.300000  5.846667  6.88000  7.625   \n",
       "2016  6.900000       NaN   NaN  ...       NaN       NaN      NaN    NaN   \n",
       "2018       NaN       NaN   NaN  ...       NaN       NaN      NaN    NaN   \n",
       "\n",
       "产地          美国        英国    荷兰       西班牙       阿根廷        韩国  \n",
       "年代                                                            \n",
       "1888       NaN  7.950000   NaN       NaN       NaN       NaN  \n",
       "1890  4.800000       NaN   NaN       NaN       NaN       NaN  \n",
       "1892       NaN       NaN   NaN       NaN       NaN       NaN  \n",
       "1894  6.450000       NaN   NaN       NaN       NaN       NaN  \n",
       "1895       NaN       NaN   NaN       NaN       NaN       NaN  \n",
       "...        ...       ...   ...       ...       ...       ...  \n",
       "2013  6.308255  7.460140  6.33  6.358333  6.616667  6.062037  \n",
       "2014  6.393056  7.253398  7.30  6.868750  7.150000  5.650833  \n",
       "2015  6.231486  7.123256  6.70  6.514286  7.233333  5.423853  \n",
       "2016  6.522581  7.200000   NaN       NaN       NaN  5.730000  \n",
       "2018  6.935704       NaN   NaN       NaN       NaN       NaN  \n",
       "\n",
       "[127 rows x 25 columns]"
      ]
     },
     "execution_count": 37,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "means.unstack().T"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 3.3 离散化处理 "
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "**在实际的数据分析项目中，对有的数据属性，我们往往并不关注数据的绝对取值，只关心它所处的区间或者等级**\n",
    "\n",
    "**比如，我们可以把评分9分及以上的电影定义为A，7到9分定义为B，5到7分定义为C，3到5分定义为D，小于3分定义为E。**"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "**离散化也可称为分组、区间化。**\n",
    "\n",
    "Pandas为我们提供了方便的函数cut():\n",
    "\n",
    "pd.cut(x,bins,right = True,labels = None, retbins = False,precision = 3,include_lowest = False) 参数解释：\n",
    "\n",
    "x：需要离散化的数组、Series、DataFrame对象\n",
    "\n",
    "bins：分组的依据，right = True，include_lowest = False，默认左开右闭，可以自己调整。\n",
    "\n",
    "labels：是否要用标记来替换返回出来的数组，retbins：返回x当中每一个值对应的bins的列表，precision精度。"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 38,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>年代</th>\n",
       "      <th>产地</th>\n",
       "      <th>名字</th>\n",
       "      <th>投票人数</th>\n",
       "      <th>类型</th>\n",
       "      <th>上映时间</th>\n",
       "      <th>时长</th>\n",
       "      <th>评分</th>\n",
       "      <th>首映地点</th>\n",
       "      <th>评分等级</th>\n",
       "      <th>热门程度</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1994</td>\n",
       "      <td>美国</td>\n",
       "      <td>肖申克的救赎</td>\n",
       "      <td>692795</td>\n",
       "      <td>剧情/犯罪</td>\n",
       "      <td>1994-09-10 00:00:00</td>\n",
       "      <td>142</td>\n",
       "      <td>9.600000</td>\n",
       "      <td>多伦多电影节</td>\n",
       "      <td>A</td>\n",
       "      <td>A</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1957</td>\n",
       "      <td>美国</td>\n",
       "      <td>控方证人</td>\n",
       "      <td>42995</td>\n",
       "      <td>剧情/悬疑/犯罪</td>\n",
       "      <td>1957-12-17 00:00:00</td>\n",
       "      <td>116</td>\n",
       "      <td>9.500000</td>\n",
       "      <td>美国</td>\n",
       "      <td>A</td>\n",
       "      <td>A</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>1997</td>\n",
       "      <td>意大利</td>\n",
       "      <td>美丽人生</td>\n",
       "      <td>327855</td>\n",
       "      <td>剧情/喜剧/爱情</td>\n",
       "      <td>1997-12-20 00:00:00</td>\n",
       "      <td>116</td>\n",
       "      <td>9.500000</td>\n",
       "      <td>意大利</td>\n",
       "      <td>A</td>\n",
       "      <td>A</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>1994</td>\n",
       "      <td>美国</td>\n",
       "      <td>阿甘正传</td>\n",
       "      <td>580897</td>\n",
       "      <td>剧情/爱情</td>\n",
       "      <td>1994-06-23 00:00:00</td>\n",
       "      <td>142</td>\n",
       "      <td>9.400000</td>\n",
       "      <td>洛杉矶首映</td>\n",
       "      <td>A</td>\n",
       "      <td>A</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>1993</td>\n",
       "      <td>中国大陆</td>\n",
       "      <td>霸王别姬</td>\n",
       "      <td>478523</td>\n",
       "      <td>剧情/爱情/同性</td>\n",
       "      <td>1993-01-01 00:00:00</td>\n",
       "      <td>171</td>\n",
       "      <td>9.400000</td>\n",
       "      <td>香港</td>\n",
       "      <td>A</td>\n",
       "      <td>A</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>38158</th>\n",
       "      <td>1935</td>\n",
       "      <td>美国</td>\n",
       "      <td>1935年</td>\n",
       "      <td>57</td>\n",
       "      <td>喜剧/歌舞</td>\n",
       "      <td>1935-03-15 00:00:00</td>\n",
       "      <td>98</td>\n",
       "      <td>7.600000</td>\n",
       "      <td>美国</td>\n",
       "      <td>B</td>\n",
       "      <td>E</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>38159</th>\n",
       "      <td>1986</td>\n",
       "      <td>中国大陆</td>\n",
       "      <td>血溅画屏</td>\n",
       "      <td>95</td>\n",
       "      <td>剧情/悬疑/犯罪/武侠/古装</td>\n",
       "      <td>1905-06-08 00:00:00</td>\n",
       "      <td>91</td>\n",
       "      <td>7.100000</td>\n",
       "      <td>美国</td>\n",
       "      <td>B</td>\n",
       "      <td>D</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>38160</th>\n",
       "      <td>1986</td>\n",
       "      <td>中国大陆</td>\n",
       "      <td>魔窟中的幻想</td>\n",
       "      <td>51</td>\n",
       "      <td>惊悚/恐怖/儿童</td>\n",
       "      <td>1905-06-08 00:00:00</td>\n",
       "      <td>78</td>\n",
       "      <td>8.000000</td>\n",
       "      <td>美国</td>\n",
       "      <td>B</td>\n",
       "      <td>E</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>38161</th>\n",
       "      <td>1977</td>\n",
       "      <td>俄罗斯</td>\n",
       "      <td>列宁格勒围困之星火战役 Блокада: Фильм 2: Ленинградский ме...</td>\n",
       "      <td>32</td>\n",
       "      <td>剧情/战争</td>\n",
       "      <td>1905-05-30 00:00:00</td>\n",
       "      <td>97</td>\n",
       "      <td>6.600000</td>\n",
       "      <td>美国</td>\n",
       "      <td>C</td>\n",
       "      <td>E</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>38162</th>\n",
       "      <td>2018</td>\n",
       "      <td>美国</td>\n",
       "      <td>复仇者联盟3</td>\n",
       "      <td>123456</td>\n",
       "      <td>剧情/科幻</td>\n",
       "      <td>2018-05-04 00:00:00</td>\n",
       "      <td>142</td>\n",
       "      <td>6.935704</td>\n",
       "      <td>美国</td>\n",
       "      <td>C</td>\n",
       "      <td>A</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>38163 rows × 11 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "         年代    产地                                                 名字    投票人数  \\\n",
       "0      1994    美国                                             肖申克的救赎  692795   \n",
       "1      1957    美国                                               控方证人   42995   \n",
       "2      1997   意大利                                              美丽人生   327855   \n",
       "3      1994    美国                                               阿甘正传  580897   \n",
       "4      1993  中国大陆                                               霸王别姬  478523   \n",
       "...     ...   ...                                                ...     ...   \n",
       "38158  1935    美国                                              1935年      57   \n",
       "38159  1986  中国大陆                                               血溅画屏      95   \n",
       "38160  1986  中国大陆                                             魔窟中的幻想      51   \n",
       "38161  1977   俄罗斯  列宁格勒围困之星火战役 Блокада: Фильм 2: Ленинградский ме...      32   \n",
       "38162  2018    美国                                             复仇者联盟3  123456   \n",
       "\n",
       "                   类型                 上映时间   时长        评分    首映地点 评分等级 热门程度  \n",
       "0               剧情/犯罪  1994-09-10 00:00:00  142  9.600000  多伦多电影节    A    A  \n",
       "1            剧情/悬疑/犯罪  1957-12-17 00:00:00  116  9.500000      美国    A    A  \n",
       "2            剧情/喜剧/爱情  1997-12-20 00:00:00  116  9.500000     意大利    A    A  \n",
       "3               剧情/爱情  1994-06-23 00:00:00  142  9.400000   洛杉矶首映    A    A  \n",
       "4            剧情/爱情/同性  1993-01-01 00:00:00  171  9.400000      香港    A    A  \n",
       "...               ...                  ...  ...       ...     ...  ...  ...  \n",
       "38158           喜剧/歌舞  1935-03-15 00:00:00   98  7.600000      美国    B    E  \n",
       "38159  剧情/悬疑/犯罪/武侠/古装  1905-06-08 00:00:00   91  7.100000      美国    B    D  \n",
       "38160        惊悚/恐怖/儿童  1905-06-08 00:00:00   78  8.000000      美国    B    E  \n",
       "38161           剧情/战争  1905-05-30 00:00:00   97  6.600000      美国    C    E  \n",
       "38162           剧情/科幻  2018-05-04 00:00:00  142  6.935704      美国    C    A  \n",
       "\n",
       "[38163 rows x 11 columns]"
      ]
     },
     "execution_count": 38,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df[\"评分等级\"] = pd.cut(df[\"评分\"], [0,3,5,7,9,10], labels = ['E','D','C','B','A']) #labels要和区间划分一一对应\n",
    "df"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "**同样的，我们可以根据投票人数来刻画电影的热门**\n",
    "\n",
    "**投票越多的热门程度越高**"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 39,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>年代</th>\n",
       "      <th>产地</th>\n",
       "      <th>名字</th>\n",
       "      <th>投票人数</th>\n",
       "      <th>类型</th>\n",
       "      <th>上映时间</th>\n",
       "      <th>时长</th>\n",
       "      <th>评分</th>\n",
       "      <th>首映地点</th>\n",
       "      <th>评分等级</th>\n",
       "      <th>热门程度</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1994</td>\n",
       "      <td>美国</td>\n",
       "      <td>肖申克的救赎</td>\n",
       "      <td>692795</td>\n",
       "      <td>剧情/犯罪</td>\n",
       "      <td>1994-09-10 00:00:00</td>\n",
       "      <td>142</td>\n",
       "      <td>9.6</td>\n",
       "      <td>多伦多电影节</td>\n",
       "      <td>A</td>\n",
       "      <td>A</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1957</td>\n",
       "      <td>美国</td>\n",
       "      <td>控方证人</td>\n",
       "      <td>42995</td>\n",
       "      <td>剧情/悬疑/犯罪</td>\n",
       "      <td>1957-12-17 00:00:00</td>\n",
       "      <td>116</td>\n",
       "      <td>9.5</td>\n",
       "      <td>美国</td>\n",
       "      <td>A</td>\n",
       "      <td>A</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>1997</td>\n",
       "      <td>意大利</td>\n",
       "      <td>美丽人生</td>\n",
       "      <td>327855</td>\n",
       "      <td>剧情/喜剧/爱情</td>\n",
       "      <td>1997-12-20 00:00:00</td>\n",
       "      <td>116</td>\n",
       "      <td>9.5</td>\n",
       "      <td>意大利</td>\n",
       "      <td>A</td>\n",
       "      <td>A</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>1994</td>\n",
       "      <td>美国</td>\n",
       "      <td>阿甘正传</td>\n",
       "      <td>580897</td>\n",
       "      <td>剧情/爱情</td>\n",
       "      <td>1994-06-23 00:00:00</td>\n",
       "      <td>142</td>\n",
       "      <td>9.4</td>\n",
       "      <td>洛杉矶首映</td>\n",
       "      <td>A</td>\n",
       "      <td>A</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>1993</td>\n",
       "      <td>中国大陆</td>\n",
       "      <td>霸王别姬</td>\n",
       "      <td>478523</td>\n",
       "      <td>剧情/爱情/同性</td>\n",
       "      <td>1993-01-01 00:00:00</td>\n",
       "      <td>171</td>\n",
       "      <td>9.4</td>\n",
       "      <td>香港</td>\n",
       "      <td>A</td>\n",
       "      <td>A</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "     年代    产地      名字    投票人数        类型                 上映时间   时长   评分  \\\n",
       "0  1994    美国  肖申克的救赎  692795     剧情/犯罪  1994-09-10 00:00:00  142  9.6   \n",
       "1  1957    美国    控方证人   42995  剧情/悬疑/犯罪  1957-12-17 00:00:00  116  9.5   \n",
       "2  1997   意大利   美丽人生   327855  剧情/喜剧/爱情  1997-12-20 00:00:00  116  9.5   \n",
       "3  1994    美国    阿甘正传  580897     剧情/爱情  1994-06-23 00:00:00  142  9.4   \n",
       "4  1993  中国大陆    霸王别姬  478523  剧情/爱情/同性  1993-01-01 00:00:00  171  9.4   \n",
       "\n",
       "     首映地点 评分等级 热门程度  \n",
       "0  多伦多电影节    A    A  \n",
       "1      美国    A    A  \n",
       "2     意大利    A    A  \n",
       "3   洛杉矶首映    A    A  \n",
       "4      香港    A    A  "
      ]
     },
     "execution_count": 39,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "bins = np.percentile(df[\"投票人数\"], [0,20,40,60,80,100]) #获取分位数\n",
    "df[\"热门程度\"] = pd.cut(df[\"投票人数\"],bins,labels = ['E','D','C','B','A'])\n",
    "df[:5]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "**大烂片集合：投票人数很多，评分很低**\n",
    "\n",
    "**遗憾的是，我们可以发现，烂片几乎都是中国大陆的**"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 40,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>年代</th>\n",
       "      <th>产地</th>\n",
       "      <th>名字</th>\n",
       "      <th>投票人数</th>\n",
       "      <th>类型</th>\n",
       "      <th>上映时间</th>\n",
       "      <th>时长</th>\n",
       "      <th>评分</th>\n",
       "      <th>首映地点</th>\n",
       "      <th>评分等级</th>\n",
       "      <th>热门程度</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>623</th>\n",
       "      <td>2011</td>\n",
       "      <td>中国大陆</td>\n",
       "      <td>B区</td>\n",
       "      <td>5187</td>\n",
       "      <td>剧情/惊悚/恐怖</td>\n",
       "      <td>2011-06-03 00:00:00</td>\n",
       "      <td>89</td>\n",
       "      <td>2.3</td>\n",
       "      <td>中国大陆</td>\n",
       "      <td>E</td>\n",
       "      <td>A</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4167</th>\n",
       "      <td>2014</td>\n",
       "      <td>中国大陆</td>\n",
       "      <td>怖偶</td>\n",
       "      <td>4867</td>\n",
       "      <td>悬疑/惊悚</td>\n",
       "      <td>2014-05-07 00:00:00</td>\n",
       "      <td>88</td>\n",
       "      <td>2.8</td>\n",
       "      <td>中国大陆</td>\n",
       "      <td>E</td>\n",
       "      <td>A</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5200</th>\n",
       "      <td>2011</td>\n",
       "      <td>中国大陆</td>\n",
       "      <td>床下有人</td>\n",
       "      <td>4309</td>\n",
       "      <td>悬疑/惊悚</td>\n",
       "      <td>2011-10-14 00:00:00</td>\n",
       "      <td>100</td>\n",
       "      <td>2.8</td>\n",
       "      <td>中国大陆</td>\n",
       "      <td>E</td>\n",
       "      <td>A</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6585</th>\n",
       "      <td>2013</td>\n",
       "      <td>中国大陆</td>\n",
       "      <td>帝国秘符</td>\n",
       "      <td>4351</td>\n",
       "      <td>动作/冒险</td>\n",
       "      <td>2013-09-18 00:00:00</td>\n",
       "      <td>93</td>\n",
       "      <td>3.0</td>\n",
       "      <td>中国大陆</td>\n",
       "      <td>E</td>\n",
       "      <td>A</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8009</th>\n",
       "      <td>2011</td>\n",
       "      <td>中国大陆</td>\n",
       "      <td>飞天</td>\n",
       "      <td>4764</td>\n",
       "      <td>剧情</td>\n",
       "      <td>2011-07-01 00:00:00</td>\n",
       "      <td>115</td>\n",
       "      <td>2.9</td>\n",
       "      <td>中国大陆</td>\n",
       "      <td>E</td>\n",
       "      <td>A</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8181</th>\n",
       "      <td>2014</td>\n",
       "      <td>中国大陆</td>\n",
       "      <td>分手达人</td>\n",
       "      <td>3937</td>\n",
       "      <td>喜剧/爱情</td>\n",
       "      <td>2014-06-06 00:00:00</td>\n",
       "      <td>90</td>\n",
       "      <td>2.7</td>\n",
       "      <td>中国大陆</td>\n",
       "      <td>E</td>\n",
       "      <td>A</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9372</th>\n",
       "      <td>2012</td>\n",
       "      <td>中国大陆</td>\n",
       "      <td>孤岛惊魂</td>\n",
       "      <td>2982</td>\n",
       "      <td>悬疑/惊悚/恐怖</td>\n",
       "      <td>2013-01-26 00:00:00</td>\n",
       "      <td>93</td>\n",
       "      <td>2.8</td>\n",
       "      <td>中国大陆</td>\n",
       "      <td>E</td>\n",
       "      <td>A</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10275</th>\n",
       "      <td>2013</td>\n",
       "      <td>中国大陆</td>\n",
       "      <td>海天盛宴·韦口</td>\n",
       "      <td>3788</td>\n",
       "      <td>情色</td>\n",
       "      <td>2013-10-12 00:00:00</td>\n",
       "      <td>88</td>\n",
       "      <td>2.9</td>\n",
       "      <td>网络</td>\n",
       "      <td>E</td>\n",
       "      <td>A</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>16512</th>\n",
       "      <td>2013</td>\n",
       "      <td>中国大陆</td>\n",
       "      <td>孪生密码</td>\n",
       "      <td>6390</td>\n",
       "      <td>动作/悬疑</td>\n",
       "      <td>2013-11-08 00:00:00</td>\n",
       "      <td>96</td>\n",
       "      <td>2.9</td>\n",
       "      <td>中国大陆</td>\n",
       "      <td>E</td>\n",
       "      <td>A</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>21189</th>\n",
       "      <td>2010</td>\n",
       "      <td>日本</td>\n",
       "      <td>拳皇</td>\n",
       "      <td>6329</td>\n",
       "      <td>动作/科幻/冒险</td>\n",
       "      <td>2012-10-12 00:00:00</td>\n",
       "      <td>93</td>\n",
       "      <td>3.0</td>\n",
       "      <td>中国大陆</td>\n",
       "      <td>E</td>\n",
       "      <td>A</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>22348</th>\n",
       "      <td>2013</td>\n",
       "      <td>中国大陆</td>\n",
       "      <td>闪魂</td>\n",
       "      <td>3119</td>\n",
       "      <td>惊悚/犯罪</td>\n",
       "      <td>2014-02-21 00:00:00</td>\n",
       "      <td>94</td>\n",
       "      <td>2.6</td>\n",
       "      <td>中国大陆</td>\n",
       "      <td>E</td>\n",
       "      <td>A</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>22524</th>\n",
       "      <td>2015</td>\n",
       "      <td>中国大陆</td>\n",
       "      <td>少年毛泽东</td>\n",
       "      <td>3058</td>\n",
       "      <td>动画/儿童/冒险</td>\n",
       "      <td>2015-04-30 00:00:00</td>\n",
       "      <td>76</td>\n",
       "      <td>2.4</td>\n",
       "      <td>中国大陆</td>\n",
       "      <td>E</td>\n",
       "      <td>A</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>23754</th>\n",
       "      <td>2013</td>\n",
       "      <td>英国</td>\n",
       "      <td>史前怪兽</td>\n",
       "      <td>3543</td>\n",
       "      <td>动作/惊悚/冒险</td>\n",
       "      <td>2014-01-01 00:00:00</td>\n",
       "      <td>89</td>\n",
       "      <td>3.0</td>\n",
       "      <td>中国大陆</td>\n",
       "      <td>E</td>\n",
       "      <td>A</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>27832</th>\n",
       "      <td>2011</td>\n",
       "      <td>中国大陆</td>\n",
       "      <td>无极限之危情速递</td>\n",
       "      <td>6319</td>\n",
       "      <td>喜剧/动作/爱情/冒险</td>\n",
       "      <td>2011-08-12 00:00:00</td>\n",
       "      <td>94</td>\n",
       "      <td>2.8</td>\n",
       "      <td>中国大陆</td>\n",
       "      <td>E</td>\n",
       "      <td>A</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>31622</th>\n",
       "      <td>2010</td>\n",
       "      <td>中国大陆</td>\n",
       "      <td>异度公寓</td>\n",
       "      <td>3639</td>\n",
       "      <td>惊悚</td>\n",
       "      <td>2010-06-04 00:00:00</td>\n",
       "      <td>93</td>\n",
       "      <td>2.7</td>\n",
       "      <td>中国大陆</td>\n",
       "      <td>E</td>\n",
       "      <td>A</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>32007</th>\n",
       "      <td>2014</td>\n",
       "      <td>中国大陆</td>\n",
       "      <td>英雄之战</td>\n",
       "      <td>8359</td>\n",
       "      <td>动作/爱情</td>\n",
       "      <td>2014-03-21 00:00:00</td>\n",
       "      <td>90</td>\n",
       "      <td>3.0</td>\n",
       "      <td>中国大陆</td>\n",
       "      <td>E</td>\n",
       "      <td>A</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>32180</th>\n",
       "      <td>2013</td>\n",
       "      <td>中国大陆</td>\n",
       "      <td>咏春小龙</td>\n",
       "      <td>8861</td>\n",
       "      <td>剧情/动作</td>\n",
       "      <td>2013-07-20 00:00:00</td>\n",
       "      <td>90</td>\n",
       "      <td>3.0</td>\n",
       "      <td>中国大陆</td>\n",
       "      <td>E</td>\n",
       "      <td>A</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>32990</th>\n",
       "      <td>2014</td>\n",
       "      <td>中国大陆</td>\n",
       "      <td>再爱一次好不好</td>\n",
       "      <td>6999</td>\n",
       "      <td>喜剧/爱情</td>\n",
       "      <td>2014-04-11 00:00:00</td>\n",
       "      <td>94</td>\n",
       "      <td>3.0</td>\n",
       "      <td>中国大陆</td>\n",
       "      <td>E</td>\n",
       "      <td>A</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>38090</th>\n",
       "      <td>2014</td>\n",
       "      <td>中国大陆</td>\n",
       "      <td>大话天仙</td>\n",
       "      <td>21629</td>\n",
       "      <td>喜剧/奇幻/古装</td>\n",
       "      <td>2014-02-02 00:00:00</td>\n",
       "      <td>91</td>\n",
       "      <td>3.0</td>\n",
       "      <td>中国大陆</td>\n",
       "      <td>E</td>\n",
       "      <td>A</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>38092</th>\n",
       "      <td>2013</td>\n",
       "      <td>中国大陆</td>\n",
       "      <td>天机·富春山居图</td>\n",
       "      <td>74709</td>\n",
       "      <td>动作/冒险</td>\n",
       "      <td>2013-06-09 00:00:00</td>\n",
       "      <td>122</td>\n",
       "      <td>2.9</td>\n",
       "      <td>中国大陆</td>\n",
       "      <td>E</td>\n",
       "      <td>A</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>38093</th>\n",
       "      <td>2014</td>\n",
       "      <td>中国大陆</td>\n",
       "      <td>特工艾米拉</td>\n",
       "      <td>10852</td>\n",
       "      <td>动作/悬疑</td>\n",
       "      <td>2014-04-11 00:00:00</td>\n",
       "      <td>96</td>\n",
       "      <td>2.7</td>\n",
       "      <td>中国大陆</td>\n",
       "      <td>E</td>\n",
       "      <td>A</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>38097</th>\n",
       "      <td>2015</td>\n",
       "      <td>中国大陆</td>\n",
       "      <td>汽车人总动员</td>\n",
       "      <td>12892</td>\n",
       "      <td>喜剧/动画/冒险</td>\n",
       "      <td>2015-07-03 00:00:00</td>\n",
       "      <td>85</td>\n",
       "      <td>2.3</td>\n",
       "      <td>中国大陆</td>\n",
       "      <td>E</td>\n",
       "      <td>A</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>38102</th>\n",
       "      <td>2016</td>\n",
       "      <td>中国大陆</td>\n",
       "      <td>2016年中央电视台春节</td>\n",
       "      <td>17328</td>\n",
       "      <td>歌舞/真人秀</td>\n",
       "      <td>2016-02-07 00:00:00</td>\n",
       "      <td>280</td>\n",
       "      <td>2.3</td>\n",
       "      <td>中国大陆</td>\n",
       "      <td>E</td>\n",
       "      <td>A</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>38108</th>\n",
       "      <td>2014</td>\n",
       "      <td>中国大陆</td>\n",
       "      <td>放手爱</td>\n",
       "      <td>29254</td>\n",
       "      <td>喜剧/爱情</td>\n",
       "      <td>2014-04-30 00:00:00</td>\n",
       "      <td>93</td>\n",
       "      <td>2.3</td>\n",
       "      <td>中国大陆</td>\n",
       "      <td>E</td>\n",
       "      <td>A</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "         年代    产地            名字   投票人数           类型                 上映时间   时长  \\\n",
       "623    2011  中国大陆            B区   5187     剧情/惊悚/恐怖  2011-06-03 00:00:00   89   \n",
       "4167   2014  中国大陆            怖偶   4867        悬疑/惊悚  2014-05-07 00:00:00   88   \n",
       "5200   2011  中国大陆          床下有人   4309        悬疑/惊悚  2011-10-14 00:00:00  100   \n",
       "6585   2013  中国大陆          帝国秘符   4351        动作/冒险  2013-09-18 00:00:00   93   \n",
       "8009   2011  中国大陆            飞天   4764           剧情  2011-07-01 00:00:00  115   \n",
       "8181   2014  中国大陆          分手达人   3937        喜剧/爱情  2014-06-06 00:00:00   90   \n",
       "9372   2012  中国大陆          孤岛惊魂   2982     悬疑/惊悚/恐怖  2013-01-26 00:00:00   93   \n",
       "10275  2013  中国大陆       海天盛宴·韦口   3788           情色  2013-10-12 00:00:00   88   \n",
       "16512  2013  中国大陆          孪生密码   6390        动作/悬疑  2013-11-08 00:00:00   96   \n",
       "21189  2010    日本            拳皇   6329     动作/科幻/冒险  2012-10-12 00:00:00   93   \n",
       "22348  2013  中国大陆            闪魂   3119        惊悚/犯罪  2014-02-21 00:00:00   94   \n",
       "22524  2015  中国大陆         少年毛泽东   3058     动画/儿童/冒险  2015-04-30 00:00:00   76   \n",
       "23754  2013    英国          史前怪兽   3543     动作/惊悚/冒险  2014-01-01 00:00:00   89   \n",
       "27832  2011  中国大陆      无极限之危情速递   6319  喜剧/动作/爱情/冒险  2011-08-12 00:00:00   94   \n",
       "31622  2010  中国大陆          异度公寓   3639           惊悚  2010-06-04 00:00:00   93   \n",
       "32007  2014  中国大陆          英雄之战   8359        动作/爱情  2014-03-21 00:00:00   90   \n",
       "32180  2013  中国大陆          咏春小龙   8861        剧情/动作  2013-07-20 00:00:00   90   \n",
       "32990  2014  中国大陆       再爱一次好不好   6999        喜剧/爱情  2014-04-11 00:00:00   94   \n",
       "38090  2014  中国大陆          大话天仙  21629     喜剧/奇幻/古装  2014-02-02 00:00:00   91   \n",
       "38092  2013  中国大陆      天机·富春山居图  74709        动作/冒险  2013-06-09 00:00:00  122   \n",
       "38093  2014  中国大陆         特工艾米拉  10852        动作/悬疑  2014-04-11 00:00:00   96   \n",
       "38097  2015  中国大陆        汽车人总动员  12892     喜剧/动画/冒险  2015-07-03 00:00:00   85   \n",
       "38102  2016  中国大陆  2016年中央电视台春节  17328       歌舞/真人秀  2016-02-07 00:00:00  280   \n",
       "38108  2014  中国大陆           放手爱  29254        喜剧/爱情  2014-04-30 00:00:00   93   \n",
       "\n",
       "        评分  首映地点 评分等级 热门程度  \n",
       "623    2.3  中国大陆    E    A  \n",
       "4167   2.8  中国大陆    E    A  \n",
       "5200   2.8  中国大陆    E    A  \n",
       "6585   3.0  中国大陆    E    A  \n",
       "8009   2.9  中国大陆    E    A  \n",
       "8181   2.7  中国大陆    E    A  \n",
       "9372   2.8  中国大陆    E    A  \n",
       "10275  2.9    网络    E    A  \n",
       "16512  2.9  中国大陆    E    A  \n",
       "21189  3.0  中国大陆    E    A  \n",
       "22348  2.6  中国大陆    E    A  \n",
       "22524  2.4  中国大陆    E    A  \n",
       "23754  3.0  中国大陆    E    A  \n",
       "27832  2.8  中国大陆    E    A  \n",
       "31622  2.7  中国大陆    E    A  \n",
       "32007  3.0  中国大陆    E    A  \n",
       "32180  3.0  中国大陆    E    A  \n",
       "32990  3.0  中国大陆    E    A  \n",
       "38090  3.0  中国大陆    E    A  \n",
       "38092  2.9  中国大陆    E    A  \n",
       "38093  2.7  中国大陆    E    A  \n",
       "38097  2.3  中国大陆    E    A  \n",
       "38102  2.3  中国大陆    E    A  \n",
       "38108  2.3  中国大陆    E    A  "
      ]
     },
     "execution_count": 40,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df[(df.热门程度 == 'A') & (df.评分等级 == 'E')]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 冷门高分电影"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 41,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>年代</th>\n",
       "      <th>产地</th>\n",
       "      <th>名字</th>\n",
       "      <th>投票人数</th>\n",
       "      <th>类型</th>\n",
       "      <th>上映时间</th>\n",
       "      <th>时长</th>\n",
       "      <th>评分</th>\n",
       "      <th>首映地点</th>\n",
       "      <th>评分等级</th>\n",
       "      <th>热门程度</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>563</th>\n",
       "      <td>2011</td>\n",
       "      <td>英国</td>\n",
       "      <td>BBC喜剧音</td>\n",
       "      <td>38</td>\n",
       "      <td>喜剧/音乐/歌舞</td>\n",
       "      <td>2011-08-13 00:00:00</td>\n",
       "      <td>95</td>\n",
       "      <td>9.3</td>\n",
       "      <td>美国</td>\n",
       "      <td>A</td>\n",
       "      <td>E</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>895</th>\n",
       "      <td>2014</td>\n",
       "      <td>日本</td>\n",
       "      <td>JOJO的奇妙冒险 特别见面会 Walk Like Crusade</td>\n",
       "      <td>36</td>\n",
       "      <td>纪录片</td>\n",
       "      <td>2014-10-26 00:00:00</td>\n",
       "      <td>137</td>\n",
       "      <td>9.3</td>\n",
       "      <td>美国</td>\n",
       "      <td>A</td>\n",
       "      <td>E</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1099</th>\n",
       "      <td>2012</td>\n",
       "      <td>英国</td>\n",
       "      <td>Pond一家最</td>\n",
       "      <td>45</td>\n",
       "      <td>纪录片</td>\n",
       "      <td>2012-09-29 00:00:00</td>\n",
       "      <td>12</td>\n",
       "      <td>9.2</td>\n",
       "      <td>美国</td>\n",
       "      <td>A</td>\n",
       "      <td>E</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1540</th>\n",
       "      <td>2007</td>\n",
       "      <td>英国</td>\n",
       "      <td>阿森纳：温格的十一人</td>\n",
       "      <td>74</td>\n",
       "      <td>运动</td>\n",
       "      <td>2007-10-22 00:00:00</td>\n",
       "      <td>78</td>\n",
       "      <td>9.5</td>\n",
       "      <td>美国</td>\n",
       "      <td>A</td>\n",
       "      <td>E</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1547</th>\n",
       "      <td>2009</td>\n",
       "      <td>英国</td>\n",
       "      <td>阿斯加德远征</td>\n",
       "      <td>59</td>\n",
       "      <td>纪录片</td>\n",
       "      <td>2011-09-17 00:00:00</td>\n",
       "      <td>85</td>\n",
       "      <td>9.3</td>\n",
       "      <td>美国</td>\n",
       "      <td>A</td>\n",
       "      <td>E</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>36846</th>\n",
       "      <td>2012</td>\n",
       "      <td>中国大陆</td>\n",
       "      <td>末了，未了</td>\n",
       "      <td>34</td>\n",
       "      <td>剧情/喜剧/爱情</td>\n",
       "      <td>2012-12-16 00:00:00</td>\n",
       "      <td>90</td>\n",
       "      <td>9.5</td>\n",
       "      <td>美国</td>\n",
       "      <td>A</td>\n",
       "      <td>E</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>37000</th>\n",
       "      <td>2015</td>\n",
       "      <td>中国大陆</td>\n",
       "      <td>身经百战</td>\n",
       "      <td>74</td>\n",
       "      <td>纪录片</td>\n",
       "      <td>2015-03-24 00:00:00</td>\n",
       "      <td>91</td>\n",
       "      <td>9.1</td>\n",
       "      <td>美国</td>\n",
       "      <td>A</td>\n",
       "      <td>E</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>37033</th>\n",
       "      <td>1986</td>\n",
       "      <td>英国</td>\n",
       "      <td>歌唱神探</td>\n",
       "      <td>36</td>\n",
       "      <td>剧情/悬疑/歌舞</td>\n",
       "      <td>1986-11-16 00:00:00</td>\n",
       "      <td>415</td>\n",
       "      <td>9.1</td>\n",
       "      <td>美国</td>\n",
       "      <td>A</td>\n",
       "      <td>E</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>37557</th>\n",
       "      <td>1975</td>\n",
       "      <td>美国</td>\n",
       "      <td>山那边</td>\n",
       "      <td>70</td>\n",
       "      <td>剧情</td>\n",
       "      <td>1975-11-14 00:00:00</td>\n",
       "      <td>103</td>\n",
       "      <td>9.1</td>\n",
       "      <td>美国</td>\n",
       "      <td>A</td>\n",
       "      <td>E</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>37883</th>\n",
       "      <td>2015</td>\n",
       "      <td>美国</td>\n",
       "      <td>奎</td>\n",
       "      <td>62</td>\n",
       "      <td>纪录片/短片</td>\n",
       "      <td>2015-08-19 00:00:00</td>\n",
       "      <td>9</td>\n",
       "      <td>9.1</td>\n",
       "      <td>纽约电影论坛</td>\n",
       "      <td>A</td>\n",
       "      <td>E</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>177 rows × 11 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "         年代    产地                                 名字  投票人数        类型  \\\n",
       "563    2011    英国                             BBC喜剧音    38  喜剧/音乐/歌舞   \n",
       "895    2014    日本  JOJO的奇妙冒险 特别见面会 Walk Like Crusade    36       纪录片   \n",
       "1099   2012    英国                            Pond一家最    45       纪录片   \n",
       "1540   2007    英国                        阿森纳：温格的十一人     74        运动   \n",
       "1547   2009    英国                             阿斯加德远征    59       纪录片   \n",
       "...     ...   ...                                ...   ...       ...   \n",
       "36846  2012  中国大陆                              末了，未了    34  剧情/喜剧/爱情   \n",
       "37000  2015  中国大陆                               身经百战    74       纪录片   \n",
       "37033  1986    英国                               歌唱神探    36  剧情/悬疑/歌舞   \n",
       "37557  1975    美国                                山那边    70        剧情   \n",
       "37883  2015    美国                                  奎    62    纪录片/短片   \n",
       "\n",
       "                      上映时间   时长   评分    首映地点 评分等级 热门程度  \n",
       "563    2011-08-13 00:00:00   95  9.3      美国    A    E  \n",
       "895    2014-10-26 00:00:00  137  9.3      美国    A    E  \n",
       "1099   2012-09-29 00:00:00   12  9.2      美国    A    E  \n",
       "1540   2007-10-22 00:00:00   78  9.5      美国    A    E  \n",
       "1547   2011-09-17 00:00:00   85  9.3      美国    A    E  \n",
       "...                    ...  ...  ...     ...  ...  ...  \n",
       "36846  2012-12-16 00:00:00   90  9.5      美国    A    E  \n",
       "37000  2015-03-24 00:00:00   91  9.1      美国    A    E  \n",
       "37033  1986-11-16 00:00:00  415  9.1      美国    A    E  \n",
       "37557  1975-11-14 00:00:00  103  9.1      美国    A    E  \n",
       "37883  2015-08-19 00:00:00    9  9.1  纽约电影论坛    A    E  \n",
       "\n",
       "[177 rows x 11 columns]"
      ]
     },
     "execution_count": 41,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df[(df.热门程度 == 'E') & (df.评分等级 == 'A')]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 将处理后的数据进行保存 "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 42,
   "metadata": {},
   "outputs": [],
   "source": [
    "df.to_excel(\"movie_data3.xlsx\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 3.4 合并数据集 "
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### （ 1 ）append \n",
    "先把数据集拆分为多个，再进行合并"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 43,
   "metadata": {},
   "outputs": [],
   "source": [
    "df_usa = df[df.产地 == \"美国\"]\n",
    "df_china = df[df.产地 == \"中国大陆\"]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 44,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>年代</th>\n",
       "      <th>产地</th>\n",
       "      <th>名字</th>\n",
       "      <th>投票人数</th>\n",
       "      <th>类型</th>\n",
       "      <th>上映时间</th>\n",
       "      <th>时长</th>\n",
       "      <th>评分</th>\n",
       "      <th>首映地点</th>\n",
       "      <th>评分等级</th>\n",
       "      <th>热门程度</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>1993</td>\n",
       "      <td>中国大陆</td>\n",
       "      <td>霸王别姬</td>\n",
       "      <td>478523</td>\n",
       "      <td>剧情/爱情/同性</td>\n",
       "      <td>1993-01-01 00:00:00</td>\n",
       "      <td>171</td>\n",
       "      <td>9.400000</td>\n",
       "      <td>香港</td>\n",
       "      <td>A</td>\n",
       "      <td>A</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>21</th>\n",
       "      <td>1961</td>\n",
       "      <td>中国大陆</td>\n",
       "      <td>大闹天宫</td>\n",
       "      <td>74881</td>\n",
       "      <td>动画/奇幻</td>\n",
       "      <td>1905-05-14 00:00:00</td>\n",
       "      <td>114</td>\n",
       "      <td>9.200000</td>\n",
       "      <td>上集</td>\n",
       "      <td>A</td>\n",
       "      <td>A</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>29</th>\n",
       "      <td>2015</td>\n",
       "      <td>中国大陆</td>\n",
       "      <td>穹顶之下</td>\n",
       "      <td>51113</td>\n",
       "      <td>纪录片</td>\n",
       "      <td>2015-02-28 00:00:00</td>\n",
       "      <td>104</td>\n",
       "      <td>9.200000</td>\n",
       "      <td>中国大陆</td>\n",
       "      <td>A</td>\n",
       "      <td>A</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>38</th>\n",
       "      <td>1982</td>\n",
       "      <td>中国大陆</td>\n",
       "      <td>茶馆</td>\n",
       "      <td>10678</td>\n",
       "      <td>剧情/历史</td>\n",
       "      <td>1905-06-04 00:00:00</td>\n",
       "      <td>118</td>\n",
       "      <td>9.200000</td>\n",
       "      <td>美国</td>\n",
       "      <td>A</td>\n",
       "      <td>A</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>45</th>\n",
       "      <td>1988</td>\n",
       "      <td>中国大陆</td>\n",
       "      <td>山水情</td>\n",
       "      <td>10781</td>\n",
       "      <td>动画/短片</td>\n",
       "      <td>1905-06-10 00:00:00</td>\n",
       "      <td>19</td>\n",
       "      <td>9.200000</td>\n",
       "      <td>美国</td>\n",
       "      <td>A</td>\n",
       "      <td>A</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>38151</th>\n",
       "      <td>1987</td>\n",
       "      <td>美国</td>\n",
       "      <td>零下的激情</td>\n",
       "      <td>199</td>\n",
       "      <td>剧情/爱情/犯罪</td>\n",
       "      <td>1987-11-06 00:00:00</td>\n",
       "      <td>98</td>\n",
       "      <td>7.400000</td>\n",
       "      <td>美国</td>\n",
       "      <td>B</td>\n",
       "      <td>D</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>38153</th>\n",
       "      <td>1986</td>\n",
       "      <td>美国</td>\n",
       "      <td>离别秋波</td>\n",
       "      <td>240</td>\n",
       "      <td>剧情/爱情/音乐</td>\n",
       "      <td>1986-02-19 00:00:00</td>\n",
       "      <td>90</td>\n",
       "      <td>8.200000</td>\n",
       "      <td>美国</td>\n",
       "      <td>B</td>\n",
       "      <td>C</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>38156</th>\n",
       "      <td>1986</td>\n",
       "      <td>美国</td>\n",
       "      <td>极乐森林</td>\n",
       "      <td>45</td>\n",
       "      <td>纪录片</td>\n",
       "      <td>1986-09-14 00:00:00</td>\n",
       "      <td>90</td>\n",
       "      <td>8.100000</td>\n",
       "      <td>美国</td>\n",
       "      <td>B</td>\n",
       "      <td>E</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>38158</th>\n",
       "      <td>1935</td>\n",
       "      <td>美国</td>\n",
       "      <td>1935年</td>\n",
       "      <td>57</td>\n",
       "      <td>喜剧/歌舞</td>\n",
       "      <td>1935-03-15 00:00:00</td>\n",
       "      <td>98</td>\n",
       "      <td>7.600000</td>\n",
       "      <td>美国</td>\n",
       "      <td>B</td>\n",
       "      <td>E</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>38162</th>\n",
       "      <td>2018</td>\n",
       "      <td>美国</td>\n",
       "      <td>复仇者联盟3</td>\n",
       "      <td>123456</td>\n",
       "      <td>剧情/科幻</td>\n",
       "      <td>2018-05-04 00:00:00</td>\n",
       "      <td>142</td>\n",
       "      <td>6.935704</td>\n",
       "      <td>美国</td>\n",
       "      <td>C</td>\n",
       "      <td>A</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>15505 rows × 11 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "         年代    产地      名字    投票人数        类型                 上映时间   时长  \\\n",
       "4      1993  中国大陆    霸王别姬  478523  剧情/爱情/同性  1993-01-01 00:00:00  171   \n",
       "21     1961  中国大陆    大闹天宫   74881     动画/奇幻  1905-05-14 00:00:00  114   \n",
       "29     2015  中国大陆    穹顶之下   51113       纪录片  2015-02-28 00:00:00  104   \n",
       "38     1982  中国大陆      茶馆   10678     剧情/历史  1905-06-04 00:00:00  118   \n",
       "45     1988  中国大陆     山水情   10781     动画/短片  1905-06-10 00:00:00   19   \n",
       "...     ...   ...     ...     ...       ...                  ...  ...   \n",
       "38151  1987    美国   零下的激情     199  剧情/爱情/犯罪  1987-11-06 00:00:00   98   \n",
       "38153  1986    美国    离别秋波     240  剧情/爱情/音乐  1986-02-19 00:00:00   90   \n",
       "38156  1986    美国    极乐森林      45       纪录片  1986-09-14 00:00:00   90   \n",
       "38158  1935    美国   1935年      57     喜剧/歌舞  1935-03-15 00:00:00   98   \n",
       "38162  2018    美国  复仇者联盟3  123456     剧情/科幻  2018-05-04 00:00:00  142   \n",
       "\n",
       "             评分  首映地点 评分等级 热门程度  \n",
       "4      9.400000    香港    A    A  \n",
       "21     9.200000    上集    A    A  \n",
       "29     9.200000  中国大陆    A    A  \n",
       "38     9.200000    美国    A    A  \n",
       "45     9.200000    美国    A    A  \n",
       "...         ...   ...  ...  ...  \n",
       "38151  7.400000    美国    B    D  \n",
       "38153  8.200000    美国    B    C  \n",
       "38156  8.100000    美国    B    E  \n",
       "38158  7.600000    美国    B    E  \n",
       "38162  6.935704    美国    C    A  \n",
       "\n",
       "[15505 rows x 11 columns]"
      ]
     },
     "execution_count": 44,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_china.append(df_usa) #直接追加到后面，最好是变量相同的"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "将这两个数据集进行合并"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### （ 2 ）merge "
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "```python\n",
    "pd.merge(left, right, how = 'inner', on = None, left_on = None, right_on = None,\n",
    "    left_index = False, right_index = False, sort = True,\n",
    "    suffixes = ('_x', '_y'), copy = True, indicator = False, validate=None) \n",
    "```"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "left : DataFrame\n",
    "\n",
    "right : DataFrame or named Series\n",
    "    Object to merge with.\n",
    "\n",
    "how : {'left', 'right', 'outer', 'inner'}, default 'inner'\n",
    "    Type of merge to be performed.\n",
    "\n",
    "    * left: use only keys from left frame, similar to a SQL left outer join;\n",
    "      preserve key order.\n",
    "    * right: use only keys from right frame, similar to a SQL right outer join;\n",
    "      preserve key order.\n",
    "    * outer: use union of keys from both frames, similar to a SQL full outer\n",
    "      join; sort keys lexicographically.\n",
    "    * inner: use intersection of keys from both frames, similar to a SQL inner\n",
    "      join; preserve the order of the left keys.\n",
    "      \n",
    "on : label or list\n",
    "    Column or index level names to join on. These must be found in both\n",
    "    DataFrames. If `on` is None and not merging on indexes then this defaults\n",
    "    to the intersection of the columns in both DataFrames.\n",
    "    \n",
    "left_on : label or list, or array-like\n",
    "    Column or index level names to join on in the left DataFrame. Can also\n",
    "    be an array or list of arrays of the length of the left DataFrame.\n",
    "    These arrays are treated as if they are columns.\n",
    "    \n",
    "right_on : label or list, or array-like\n",
    "    Column or index level names to join on in the right DataFrame. Can also\n",
    "    be an array or list of arrays of the length of the right DataFrame.\n",
    "    These arrays are treated as if they are columns.\n",
    "    \n",
    "left_index : bool, default False\n",
    "    Use the index from the left DataFrame as the join key(s). If it is a\n",
    "    MultiIndex, the number of keys in the other DataFrame (either the index\n",
    "    or a number of columns) must match the number of levels.\n",
    "    \n",
    "right_index : bool, default False\n",
    "    Use the index from the right DataFrame as the join key. Same caveats as\n",
    "    left_index.\n",
    "    \n",
    "sort : bool, default False\n",
    "    Sort the join keys lexicographically in the result DataFrame. If False,\n",
    "    the order of the join keys depends on the join type (how keyword).\n",
    "    \n",
    "suffixes : tuple of (str, str), default ('_x', '_y')\n",
    "    Suffix to apply to overlapping column names in the left and right\n",
    "    side, respectively. To raise an exception on overlapping columns use\n",
    "    (False, False).\n",
    "    \n",
    "copy : bool, default True\n",
    "    If False, avoid copy if possible.\n",
    "    \n",
    "indicator : bool or str, default False\n",
    "    If True, adds a column to output DataFrame called \"_merge\" with\n",
    "    information on the source of each row.\n",
    "    If string, column with information on source of each row will be added to\n",
    "    output DataFrame, and column will be named value of string.\n",
    "    Information column is Categorical-type and takes on a value of \"left_only\"\n",
    "    for observations whose merge key only appears in 'left' DataFrame,\n",
    "    \"right_only\" for observations whose merge key only appears in 'right'\n",
    "    DataFrame, and \"both\" if the observation's merge key is found in both.\n",
    "\n",
    "validate : str, optional\n",
    "    If specified, checks if merge is of specified type.\n",
    "\n",
    "    * \"one_to_one\" or \"1:1\": check if merge keys are unique in both\n",
    "      left and right datasets.\n",
    "    * \"one_to_many\" or \"1:m\": check if merge keys are unique in left\n",
    "      dataset.\n",
    "    * \"many_to_one\" or \"m:1\": check if merge keys are unique in right\n",
    "      dataset.\n",
    "    * \"many_to_many\" or \"m:m\": allowed, but does not result in checks.\n",
    "\n",
    "    .. versionadded:: 0.21.0\n",
    "\n",
    "Returns\n",
    "-------\n",
    "DataFrame\n",
    "    A DataFrame of the two merged objects.\n",
    "\n",
    "See Also\n",
    "--------\n",
    "merge_ordered : Merge with optional filling/interpolation.\n",
    "merge_asof : Merge on nearest keys.\n",
    "DataFrame.join : Similar method using indices.\n",
    "\n",
    "Notes\n",
    "-----\n",
    "Support for specifying index levels as the `on`, `left_on`, and\n",
    "`right_on` parameters was added in version 0.23.0\n",
    "Support for merging named Series objects was added in version 0.24.0"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 我们选取6部热门电影"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 45,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>年代</th>\n",
       "      <th>产地</th>\n",
       "      <th>名字</th>\n",
       "      <th>投票人数</th>\n",
       "      <th>类型</th>\n",
       "      <th>上映时间</th>\n",
       "      <th>时长</th>\n",
       "      <th>评分</th>\n",
       "      <th>首映地点</th>\n",
       "      <th>评分等级</th>\n",
       "      <th>热门程度</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1994</td>\n",
       "      <td>美国</td>\n",
       "      <td>肖申克的救赎</td>\n",
       "      <td>692795</td>\n",
       "      <td>剧情/犯罪</td>\n",
       "      <td>1994-09-10 00:00:00</td>\n",
       "      <td>142</td>\n",
       "      <td>9.6</td>\n",
       "      <td>多伦多电影节</td>\n",
       "      <td>A</td>\n",
       "      <td>A</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1957</td>\n",
       "      <td>美国</td>\n",
       "      <td>控方证人</td>\n",
       "      <td>42995</td>\n",
       "      <td>剧情/悬疑/犯罪</td>\n",
       "      <td>1957-12-17 00:00:00</td>\n",
       "      <td>116</td>\n",
       "      <td>9.5</td>\n",
       "      <td>美国</td>\n",
       "      <td>A</td>\n",
       "      <td>A</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>1997</td>\n",
       "      <td>意大利</td>\n",
       "      <td>美丽人生</td>\n",
       "      <td>327855</td>\n",
       "      <td>剧情/喜剧/爱情</td>\n",
       "      <td>1997-12-20 00:00:00</td>\n",
       "      <td>116</td>\n",
       "      <td>9.5</td>\n",
       "      <td>意大利</td>\n",
       "      <td>A</td>\n",
       "      <td>A</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>1994</td>\n",
       "      <td>美国</td>\n",
       "      <td>阿甘正传</td>\n",
       "      <td>580897</td>\n",
       "      <td>剧情/爱情</td>\n",
       "      <td>1994-06-23 00:00:00</td>\n",
       "      <td>142</td>\n",
       "      <td>9.4</td>\n",
       "      <td>洛杉矶首映</td>\n",
       "      <td>A</td>\n",
       "      <td>A</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>1993</td>\n",
       "      <td>中国大陆</td>\n",
       "      <td>霸王别姬</td>\n",
       "      <td>478523</td>\n",
       "      <td>剧情/爱情/同性</td>\n",
       "      <td>1993-01-01 00:00:00</td>\n",
       "      <td>171</td>\n",
       "      <td>9.4</td>\n",
       "      <td>香港</td>\n",
       "      <td>A</td>\n",
       "      <td>A</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>2012</td>\n",
       "      <td>美国</td>\n",
       "      <td>泰坦尼克号</td>\n",
       "      <td>157074</td>\n",
       "      <td>剧情/爱情/灾难</td>\n",
       "      <td>2012-04-10 00:00:00</td>\n",
       "      <td>194</td>\n",
       "      <td>9.4</td>\n",
       "      <td>中国大陆</td>\n",
       "      <td>A</td>\n",
       "      <td>A</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "     年代    产地      名字    投票人数        类型                 上映时间   时长   评分  \\\n",
       "0  1994    美国  肖申克的救赎  692795     剧情/犯罪  1994-09-10 00:00:00  142  9.6   \n",
       "1  1957    美国    控方证人   42995  剧情/悬疑/犯罪  1957-12-17 00:00:00  116  9.5   \n",
       "2  1997   意大利   美丽人生   327855  剧情/喜剧/爱情  1997-12-20 00:00:00  116  9.5   \n",
       "3  1994    美国    阿甘正传  580897     剧情/爱情  1994-06-23 00:00:00  142  9.4   \n",
       "4  1993  中国大陆    霸王别姬  478523  剧情/爱情/同性  1993-01-01 00:00:00  171  9.4   \n",
       "5  2012    美国  泰坦尼克号   157074  剧情/爱情/灾难  2012-04-10 00:00:00  194  9.4   \n",
       "\n",
       "     首映地点 评分等级 热门程度  \n",
       "0  多伦多电影节    A    A  \n",
       "1      美国    A    A  \n",
       "2     意大利    A    A  \n",
       "3   洛杉矶首映    A    A  \n",
       "4      香港    A    A  \n",
       "5    中国大陆    A    A  "
      ]
     },
     "execution_count": 45,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df1 = df.loc[:5]\n",
    "df1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 46,
   "metadata": {},
   "outputs": [],
   "source": [
    "df2 = df.loc[:5][[\"名字\",\"产地\"]]\n",
    "df2[\"票房\"] = [123344,23454,55556,333,6666,444]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 47,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>名字</th>\n",
       "      <th>产地</th>\n",
       "      <th>票房</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>肖申克的救赎</td>\n",
       "      <td>美国</td>\n",
       "      <td>123344</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>控方证人</td>\n",
       "      <td>美国</td>\n",
       "      <td>23454</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>美丽人生</td>\n",
       "      <td>意大利</td>\n",
       "      <td>55556</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>阿甘正传</td>\n",
       "      <td>美国</td>\n",
       "      <td>333</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>霸王别姬</td>\n",
       "      <td>中国大陆</td>\n",
       "      <td>6666</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>泰坦尼克号</td>\n",
       "      <td>美国</td>\n",
       "      <td>444</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "       名字    产地      票房\n",
       "0  肖申克的救赎    美国  123344\n",
       "1    控方证人    美国   23454\n",
       "2   美丽人生    意大利   55556\n",
       "3    阿甘正传    美国     333\n",
       "4    霸王别姬  中国大陆    6666\n",
       "5  泰坦尼克号     美国     444"
      ]
     },
     "execution_count": 47,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 48,
   "metadata": {},
   "outputs": [],
   "source": [
    "df2 = df2.sample(frac = 1) #打乱数据"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 49,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>名字</th>\n",
       "      <th>产地</th>\n",
       "      <th>票房</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>肖申克的救赎</td>\n",
       "      <td>美国</td>\n",
       "      <td>123344</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>泰坦尼克号</td>\n",
       "      <td>美国</td>\n",
       "      <td>444</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>阿甘正传</td>\n",
       "      <td>美国</td>\n",
       "      <td>333</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>美丽人生</td>\n",
       "      <td>意大利</td>\n",
       "      <td>55556</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>霸王别姬</td>\n",
       "      <td>中国大陆</td>\n",
       "      <td>6666</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>控方证人</td>\n",
       "      <td>美国</td>\n",
       "      <td>23454</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "       名字    产地      票房\n",
       "0  肖申克的救赎    美国  123344\n",
       "1  泰坦尼克号     美国     444\n",
       "2    阿甘正传    美国     333\n",
       "3   美丽人生    意大利   55556\n",
       "4    霸王别姬  中国大陆    6666\n",
       "5    控方证人    美国   23454"
      ]
     },
     "execution_count": 49,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df2.index = range(len(df2))\n",
    "df2"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "现在，我们需要把df1和df2合并"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "我们发现，df2有票房数据，df1有评分等其他信息  \n",
    "由于样本的顺序不一致，因此不能直接采取直接复制的方法"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 50,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>年代</th>\n",
       "      <th>产地_x</th>\n",
       "      <th>名字</th>\n",
       "      <th>投票人数</th>\n",
       "      <th>类型</th>\n",
       "      <th>上映时间</th>\n",
       "      <th>时长</th>\n",
       "      <th>评分</th>\n",
       "      <th>首映地点</th>\n",
       "      <th>评分等级</th>\n",
       "      <th>热门程度</th>\n",
       "      <th>产地_y</th>\n",
       "      <th>票房</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1994</td>\n",
       "      <td>美国</td>\n",
       "      <td>肖申克的救赎</td>\n",
       "      <td>692795</td>\n",
       "      <td>剧情/犯罪</td>\n",
       "      <td>1994-09-10 00:00:00</td>\n",
       "      <td>142</td>\n",
       "      <td>9.6</td>\n",
       "      <td>多伦多电影节</td>\n",
       "      <td>A</td>\n",
       "      <td>A</td>\n",
       "      <td>美国</td>\n",
       "      <td>123344</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1957</td>\n",
       "      <td>美国</td>\n",
       "      <td>控方证人</td>\n",
       "      <td>42995</td>\n",
       "      <td>剧情/悬疑/犯罪</td>\n",
       "      <td>1957-12-17 00:00:00</td>\n",
       "      <td>116</td>\n",
       "      <td>9.5</td>\n",
       "      <td>美国</td>\n",
       "      <td>A</td>\n",
       "      <td>A</td>\n",
       "      <td>美国</td>\n",
       "      <td>23454</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>1997</td>\n",
       "      <td>意大利</td>\n",
       "      <td>美丽人生</td>\n",
       "      <td>327855</td>\n",
       "      <td>剧情/喜剧/爱情</td>\n",
       "      <td>1997-12-20 00:00:00</td>\n",
       "      <td>116</td>\n",
       "      <td>9.5</td>\n",
       "      <td>意大利</td>\n",
       "      <td>A</td>\n",
       "      <td>A</td>\n",
       "      <td>意大利</td>\n",
       "      <td>55556</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>1994</td>\n",
       "      <td>美国</td>\n",
       "      <td>阿甘正传</td>\n",
       "      <td>580897</td>\n",
       "      <td>剧情/爱情</td>\n",
       "      <td>1994-06-23 00:00:00</td>\n",
       "      <td>142</td>\n",
       "      <td>9.4</td>\n",
       "      <td>洛杉矶首映</td>\n",
       "      <td>A</td>\n",
       "      <td>A</td>\n",
       "      <td>美国</td>\n",
       "      <td>333</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>1993</td>\n",
       "      <td>中国大陆</td>\n",
       "      <td>霸王别姬</td>\n",
       "      <td>478523</td>\n",
       "      <td>剧情/爱情/同性</td>\n",
       "      <td>1993-01-01 00:00:00</td>\n",
       "      <td>171</td>\n",
       "      <td>9.4</td>\n",
       "      <td>香港</td>\n",
       "      <td>A</td>\n",
       "      <td>A</td>\n",
       "      <td>中国大陆</td>\n",
       "      <td>6666</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>2012</td>\n",
       "      <td>美国</td>\n",
       "      <td>泰坦尼克号</td>\n",
       "      <td>157074</td>\n",
       "      <td>剧情/爱情/灾难</td>\n",
       "      <td>2012-04-10 00:00:00</td>\n",
       "      <td>194</td>\n",
       "      <td>9.4</td>\n",
       "      <td>中国大陆</td>\n",
       "      <td>A</td>\n",
       "      <td>A</td>\n",
       "      <td>美国</td>\n",
       "      <td>444</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "     年代  产地_x      名字    投票人数        类型                 上映时间   时长   评分  \\\n",
       "0  1994    美国  肖申克的救赎  692795     剧情/犯罪  1994-09-10 00:00:00  142  9.6   \n",
       "1  1957    美国    控方证人   42995  剧情/悬疑/犯罪  1957-12-17 00:00:00  116  9.5   \n",
       "2  1997   意大利   美丽人生   327855  剧情/喜剧/爱情  1997-12-20 00:00:00  116  9.5   \n",
       "3  1994    美国    阿甘正传  580897     剧情/爱情  1994-06-23 00:00:00  142  9.4   \n",
       "4  1993  中国大陆    霸王别姬  478523  剧情/爱情/同性  1993-01-01 00:00:00  171  9.4   \n",
       "5  2012    美国  泰坦尼克号   157074  剧情/爱情/灾难  2012-04-10 00:00:00  194  9.4   \n",
       "\n",
       "     首映地点 评分等级 热门程度  产地_y      票房  \n",
       "0  多伦多电影节    A    A    美国  123344  \n",
       "1      美国    A    A    美国   23454  \n",
       "2     意大利    A    A   意大利   55556  \n",
       "3   洛杉矶首映    A    A    美国     333  \n",
       "4      香港    A    A  中国大陆    6666  \n",
       "5    中国大陆    A    A    美国     444  "
      ]
     },
     "execution_count": 50,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pd.merge(df1, df2, how = \"inner\", on = \"名字\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "由于两个数据集都存在产地，因此合并后会有两个产地信息"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### （ 3 ）concat\n",
    "将多个数据集进行批量合并"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 51,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>年代</th>\n",
       "      <th>产地</th>\n",
       "      <th>名字</th>\n",
       "      <th>投票人数</th>\n",
       "      <th>类型</th>\n",
       "      <th>上映时间</th>\n",
       "      <th>时长</th>\n",
       "      <th>评分</th>\n",
       "      <th>首映地点</th>\n",
       "      <th>评分等级</th>\n",
       "      <th>热门程度</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1994</td>\n",
       "      <td>美国</td>\n",
       "      <td>肖申克的救赎</td>\n",
       "      <td>692795</td>\n",
       "      <td>剧情/犯罪</td>\n",
       "      <td>1994-09-10 00:00:00</td>\n",
       "      <td>142</td>\n",
       "      <td>9.6</td>\n",
       "      <td>多伦多电影节</td>\n",
       "      <td>A</td>\n",
       "      <td>A</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1957</td>\n",
       "      <td>美国</td>\n",
       "      <td>控方证人</td>\n",
       "      <td>42995</td>\n",
       "      <td>剧情/悬疑/犯罪</td>\n",
       "      <td>1957-12-17 00:00:00</td>\n",
       "      <td>116</td>\n",
       "      <td>9.5</td>\n",
       "      <td>美国</td>\n",
       "      <td>A</td>\n",
       "      <td>A</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>1997</td>\n",
       "      <td>意大利</td>\n",
       "      <td>美丽人生</td>\n",
       "      <td>327855</td>\n",
       "      <td>剧情/喜剧/爱情</td>\n",
       "      <td>1997-12-20 00:00:00</td>\n",
       "      <td>116</td>\n",
       "      <td>9.5</td>\n",
       "      <td>意大利</td>\n",
       "      <td>A</td>\n",
       "      <td>A</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>1994</td>\n",
       "      <td>美国</td>\n",
       "      <td>阿甘正传</td>\n",
       "      <td>580897</td>\n",
       "      <td>剧情/爱情</td>\n",
       "      <td>1994-06-23 00:00:00</td>\n",
       "      <td>142</td>\n",
       "      <td>9.4</td>\n",
       "      <td>洛杉矶首映</td>\n",
       "      <td>A</td>\n",
       "      <td>A</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>1993</td>\n",
       "      <td>中国大陆</td>\n",
       "      <td>霸王别姬</td>\n",
       "      <td>478523</td>\n",
       "      <td>剧情/爱情/同性</td>\n",
       "      <td>1993-01-01 00:00:00</td>\n",
       "      <td>171</td>\n",
       "      <td>9.4</td>\n",
       "      <td>香港</td>\n",
       "      <td>A</td>\n",
       "      <td>A</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>2012</td>\n",
       "      <td>美国</td>\n",
       "      <td>泰坦尼克号</td>\n",
       "      <td>157074</td>\n",
       "      <td>剧情/爱情/灾难</td>\n",
       "      <td>2012-04-10 00:00:00</td>\n",
       "      <td>194</td>\n",
       "      <td>9.4</td>\n",
       "      <td>中国大陆</td>\n",
       "      <td>A</td>\n",
       "      <td>A</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>1993</td>\n",
       "      <td>美国</td>\n",
       "      <td>辛德勒的名单</td>\n",
       "      <td>306904</td>\n",
       "      <td>剧情/历史/战争</td>\n",
       "      <td>1993-11-30 00:00:00</td>\n",
       "      <td>195</td>\n",
       "      <td>9.4</td>\n",
       "      <td>华盛顿首映</td>\n",
       "      <td>A</td>\n",
       "      <td>A</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>1997</td>\n",
       "      <td>日本</td>\n",
       "      <td>新世纪福音战士剧场版：Air/真心为你 新世紀エヴァンゲリオン劇場版 Ai</td>\n",
       "      <td>24355</td>\n",
       "      <td>剧情/动作/科幻/动画/奇幻</td>\n",
       "      <td>1997-07-19 00:00:00</td>\n",
       "      <td>87</td>\n",
       "      <td>9.4</td>\n",
       "      <td>日本</td>\n",
       "      <td>A</td>\n",
       "      <td>A</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>2013</td>\n",
       "      <td>日本</td>\n",
       "      <td>银魂完结篇：直到永远的万事屋 劇場版 銀魂 完結篇 万事屋よ</td>\n",
       "      <td>21513</td>\n",
       "      <td>剧情/动画</td>\n",
       "      <td>2013-07-06 00:00:00</td>\n",
       "      <td>110</td>\n",
       "      <td>9.4</td>\n",
       "      <td>日本</td>\n",
       "      <td>A</td>\n",
       "      <td>A</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>1994</td>\n",
       "      <td>法国</td>\n",
       "      <td>这个杀手不太冷</td>\n",
       "      <td>662552</td>\n",
       "      <td>剧情/动作/犯罪</td>\n",
       "      <td>1994-09-14 00:00:00</td>\n",
       "      <td>133</td>\n",
       "      <td>9.4</td>\n",
       "      <td>法国</td>\n",
       "      <td>A</td>\n",
       "      <td>A</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>100</th>\n",
       "      <td>1993</td>\n",
       "      <td>韩国</td>\n",
       "      <td>101</td>\n",
       "      <td>146</td>\n",
       "      <td>喜剧/爱情</td>\n",
       "      <td>1993-06-19 00:00:00</td>\n",
       "      <td>112</td>\n",
       "      <td>7.4</td>\n",
       "      <td>韩国</td>\n",
       "      <td>B</td>\n",
       "      <td>D</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>101</th>\n",
       "      <td>1995</td>\n",
       "      <td>英国</td>\n",
       "      <td>10</td>\n",
       "      <td>186</td>\n",
       "      <td>喜剧</td>\n",
       "      <td>1995-01-25 00:00:00</td>\n",
       "      <td>101</td>\n",
       "      <td>7.4</td>\n",
       "      <td>美国</td>\n",
       "      <td>B</td>\n",
       "      <td>D</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>102</th>\n",
       "      <td>2013</td>\n",
       "      <td>韩国</td>\n",
       "      <td>素媛</td>\n",
       "      <td>114819</td>\n",
       "      <td>剧情/家庭</td>\n",
       "      <td>2013-10-02 00:00:00</td>\n",
       "      <td>123</td>\n",
       "      <td>9.1</td>\n",
       "      <td>韩国</td>\n",
       "      <td>A</td>\n",
       "      <td>A</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>103</th>\n",
       "      <td>2003</td>\n",
       "      <td>美国</td>\n",
       "      <td>101忠狗续集：伦敦</td>\n",
       "      <td>924</td>\n",
       "      <td>喜剧/动画/家庭</td>\n",
       "      <td>2003-01-21 00:00:00</td>\n",
       "      <td>70</td>\n",
       "      <td>7.5</td>\n",
       "      <td>美国</td>\n",
       "      <td>B</td>\n",
       "      <td>B</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>104</th>\n",
       "      <td>2000</td>\n",
       "      <td>美国</td>\n",
       "      <td>10</td>\n",
       "      <td>9514</td>\n",
       "      <td>喜剧/家庭</td>\n",
       "      <td>2000-09-22 00:00:00</td>\n",
       "      <td>100</td>\n",
       "      <td>7.0</td>\n",
       "      <td>美国</td>\n",
       "      <td>C</td>\n",
       "      <td>A</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>105</th>\n",
       "      <td>2013</td>\n",
       "      <td>韩国</td>\n",
       "      <td>10</td>\n",
       "      <td>601</td>\n",
       "      <td>剧情</td>\n",
       "      <td>2014-04-24 00:00:00</td>\n",
       "      <td>93</td>\n",
       "      <td>7.2</td>\n",
       "      <td>美国</td>\n",
       "      <td>B</td>\n",
       "      <td>C</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>106</th>\n",
       "      <td>2006</td>\n",
       "      <td>美国</td>\n",
       "      <td>10件或</td>\n",
       "      <td>1770</td>\n",
       "      <td>剧情/喜剧/爱情</td>\n",
       "      <td>2006-12-01 00:00:00</td>\n",
       "      <td>82</td>\n",
       "      <td>7.7</td>\n",
       "      <td>美国</td>\n",
       "      <td>B</td>\n",
       "      <td>B</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>107</th>\n",
       "      <td>2014</td>\n",
       "      <td>美国</td>\n",
       "      <td>10年</td>\n",
       "      <td>1531</td>\n",
       "      <td>喜剧/同性</td>\n",
       "      <td>2015-06-02 00:00:00</td>\n",
       "      <td>90</td>\n",
       "      <td>6.9</td>\n",
       "      <td>美国</td>\n",
       "      <td>C</td>\n",
       "      <td>B</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>108</th>\n",
       "      <td>2012</td>\n",
       "      <td>日本</td>\n",
       "      <td>11·25自决之日 三岛由纪夫与年轻人们 11・25自決の</td>\n",
       "      <td>149</td>\n",
       "      <td>剧情</td>\n",
       "      <td>2012-06-02 00:00:00</td>\n",
       "      <td>119</td>\n",
       "      <td>5.6</td>\n",
       "      <td>日本</td>\n",
       "      <td>C</td>\n",
       "      <td>D</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>109</th>\n",
       "      <td>1997</td>\n",
       "      <td>美国</td>\n",
       "      <td>泰坦尼克号</td>\n",
       "      <td>535491</td>\n",
       "      <td>剧情/爱情/灾难</td>\n",
       "      <td>1998-04-03 00:00:00</td>\n",
       "      <td>194</td>\n",
       "      <td>9.1</td>\n",
       "      <td>中国大陆</td>\n",
       "      <td>A</td>\n",
       "      <td>A</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>200</th>\n",
       "      <td>2014</td>\n",
       "      <td>日本</td>\n",
       "      <td>最完美的离婚 2014特别篇</td>\n",
       "      <td>18478</td>\n",
       "      <td>剧情/喜剧/爱情</td>\n",
       "      <td>2014-02-08 00:00:00</td>\n",
       "      <td>120</td>\n",
       "      <td>9.1</td>\n",
       "      <td>日本</td>\n",
       "      <td>A</td>\n",
       "      <td>A</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>201</th>\n",
       "      <td>2009</td>\n",
       "      <td>日本</td>\n",
       "      <td>2001夜物</td>\n",
       "      <td>84</td>\n",
       "      <td>剧情/动画</td>\n",
       "      <td>2009-10-02 00:00:00</td>\n",
       "      <td>80</td>\n",
       "      <td>6.6</td>\n",
       "      <td>美国</td>\n",
       "      <td>C</td>\n",
       "      <td>D</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>202</th>\n",
       "      <td>2009</td>\n",
       "      <td>中国香港</td>\n",
       "      <td>头七 頭</td>\n",
       "      <td>7039</td>\n",
       "      <td>恐怖</td>\n",
       "      <td>2009-05-21 00:00:00</td>\n",
       "      <td>60</td>\n",
       "      <td>6.2</td>\n",
       "      <td>美国</td>\n",
       "      <td>C</td>\n",
       "      <td>A</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>203</th>\n",
       "      <td>1896</td>\n",
       "      <td>法国</td>\n",
       "      <td>火车进站 L</td>\n",
       "      <td>7001</td>\n",
       "      <td>纪录片/短片</td>\n",
       "      <td>1896-01-06</td>\n",
       "      <td>60</td>\n",
       "      <td>8.8</td>\n",
       "      <td>法国</td>\n",
       "      <td>B</td>\n",
       "      <td>A</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>204</th>\n",
       "      <td>2009</td>\n",
       "      <td>美国</td>\n",
       "      <td>银行舞蹈</td>\n",
       "      <td>6944</td>\n",
       "      <td>短片</td>\n",
       "      <td>1905-07-01 00:00:00</td>\n",
       "      <td>60</td>\n",
       "      <td>7.8</td>\n",
       "      <td>美国</td>\n",
       "      <td>B</td>\n",
       "      <td>A</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>205</th>\n",
       "      <td>2003</td>\n",
       "      <td>荷兰</td>\n",
       "      <td>2003提雅</td>\n",
       "      <td>48</td>\n",
       "      <td>音乐</td>\n",
       "      <td>2003-10-07 00:00:00</td>\n",
       "      <td>200</td>\n",
       "      <td>8.9</td>\n",
       "      <td>美国</td>\n",
       "      <td>B</td>\n",
       "      <td>E</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>206</th>\n",
       "      <td>2012</td>\n",
       "      <td>美国</td>\n",
       "      <td>死亡飞车3：地狱烈</td>\n",
       "      <td>6937</td>\n",
       "      <td>动作</td>\n",
       "      <td>2012-12-12 00:00:00</td>\n",
       "      <td>60</td>\n",
       "      <td>5.8</td>\n",
       "      <td>美国</td>\n",
       "      <td>C</td>\n",
       "      <td>A</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>207</th>\n",
       "      <td>2012</td>\n",
       "      <td>日本</td>\n",
       "      <td>时光钟摆 振り</td>\n",
       "      <td>6876</td>\n",
       "      <td>剧情/动画/短片</td>\n",
       "      <td>2012-03-20 00:00:00</td>\n",
       "      <td>60</td>\n",
       "      <td>8.7</td>\n",
       "      <td>美国</td>\n",
       "      <td>B</td>\n",
       "      <td>A</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>208</th>\n",
       "      <td>2011</td>\n",
       "      <td>中国香港</td>\n",
       "      <td>你还可爱么 你還可愛</td>\n",
       "      <td>6805</td>\n",
       "      <td>短片</td>\n",
       "      <td>2011-04-22 00:00:00</td>\n",
       "      <td>60</td>\n",
       "      <td>8.3</td>\n",
       "      <td>美国</td>\n",
       "      <td>B</td>\n",
       "      <td>A</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>209</th>\n",
       "      <td>2002</td>\n",
       "      <td>中国香港</td>\n",
       "      <td>一碌蔗</td>\n",
       "      <td>6799</td>\n",
       "      <td>剧情/喜剧/爱情</td>\n",
       "      <td>2002-09-19 00:00:00</td>\n",
       "      <td>60</td>\n",
       "      <td>6.7</td>\n",
       "      <td>美国</td>\n",
       "      <td>C</td>\n",
       "      <td>A</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "       年代    产地                                     名字    投票人数  \\\n",
       "0    1994    美国                                 肖申克的救赎  692795   \n",
       "1    1957    美国                                   控方证人   42995   \n",
       "2    1997   意大利                                  美丽人生   327855   \n",
       "3    1994    美国                                   阿甘正传  580897   \n",
       "4    1993  中国大陆                                   霸王别姬  478523   \n",
       "5    2012    美国                                 泰坦尼克号   157074   \n",
       "6    1993    美国                                 辛德勒的名单  306904   \n",
       "7    1997    日本  新世纪福音战士剧场版：Air/真心为你 新世紀エヴァンゲリオン劇場版 Ai   24355   \n",
       "8    2013    日本         银魂完结篇：直到永远的万事屋 劇場版 銀魂 完結篇 万事屋よ   21513   \n",
       "9    1994    法国                               这个杀手不太冷   662552   \n",
       "100  1993    韩国                                    101     146   \n",
       "101  1995    英国                                     10     186   \n",
       "102  2013    韩国                                     素媛  114819   \n",
       "103  2003    美国                             101忠狗续集：伦敦     924   \n",
       "104  2000    美国                                     10    9514   \n",
       "105  2013    韩国                                     10     601   \n",
       "106  2006    美国                                   10件或    1770   \n",
       "107  2014    美国                                    10年    1531   \n",
       "108  2012    日本          11·25自决之日 三岛由纪夫与年轻人们 11・25自決の     149   \n",
       "109  1997    美国                                  泰坦尼克号  535491   \n",
       "200  2014    日本                         最完美的离婚 2014特别篇   18478   \n",
       "201  2009    日本                                 2001夜物      84   \n",
       "202  2009  中国香港                                   头七 頭    7039   \n",
       "203  1896    法国                                 火车进站 L    7001   \n",
       "204  2009    美国                                   银行舞蹈    6944   \n",
       "205  2003    荷兰                                 2003提雅      48   \n",
       "206  2012    美国                              死亡飞车3：地狱烈    6937   \n",
       "207  2012    日本                                时光钟摆 振り    6876   \n",
       "208  2011  中国香港                             你还可爱么 你還可愛    6805   \n",
       "209  2002  中国香港                                    一碌蔗    6799   \n",
       "\n",
       "                 类型                 上映时间   时长   评分    首映地点 评分等级 热门程度  \n",
       "0             剧情/犯罪  1994-09-10 00:00:00  142  9.6  多伦多电影节    A    A  \n",
       "1          剧情/悬疑/犯罪  1957-12-17 00:00:00  116  9.5      美国    A    A  \n",
       "2          剧情/喜剧/爱情  1997-12-20 00:00:00  116  9.5     意大利    A    A  \n",
       "3             剧情/爱情  1994-06-23 00:00:00  142  9.4   洛杉矶首映    A    A  \n",
       "4          剧情/爱情/同性  1993-01-01 00:00:00  171  9.4      香港    A    A  \n",
       "5          剧情/爱情/灾难  2012-04-10 00:00:00  194  9.4    中国大陆    A    A  \n",
       "6          剧情/历史/战争  1993-11-30 00:00:00  195  9.4   华盛顿首映    A    A  \n",
       "7    剧情/动作/科幻/动画/奇幻  1997-07-19 00:00:00   87  9.4      日本    A    A  \n",
       "8             剧情/动画  2013-07-06 00:00:00  110  9.4      日本    A    A  \n",
       "9          剧情/动作/犯罪  1994-09-14 00:00:00  133  9.4      法国    A    A  \n",
       "100           喜剧/爱情  1993-06-19 00:00:00  112  7.4      韩国    B    D  \n",
       "101              喜剧  1995-01-25 00:00:00  101  7.4      美国    B    D  \n",
       "102           剧情/家庭  2013-10-02 00:00:00  123  9.1      韩国    A    A  \n",
       "103        喜剧/动画/家庭  2003-01-21 00:00:00   70  7.5      美国    B    B  \n",
       "104           喜剧/家庭  2000-09-22 00:00:00  100  7.0      美国    C    A  \n",
       "105              剧情  2014-04-24 00:00:00   93  7.2      美国    B    C  \n",
       "106        剧情/喜剧/爱情  2006-12-01 00:00:00   82  7.7      美国    B    B  \n",
       "107           喜剧/同性  2015-06-02 00:00:00   90  6.9      美国    C    B  \n",
       "108              剧情  2012-06-02 00:00:00  119  5.6      日本    C    D  \n",
       "109        剧情/爱情/灾难  1998-04-03 00:00:00  194  9.1    中国大陆    A    A  \n",
       "200        剧情/喜剧/爱情  2014-02-08 00:00:00  120  9.1      日本    A    A  \n",
       "201           剧情/动画  2009-10-02 00:00:00   80  6.6      美国    C    D  \n",
       "202              恐怖  2009-05-21 00:00:00   60  6.2      美国    C    A  \n",
       "203          纪录片/短片           1896-01-06   60  8.8      法国    B    A  \n",
       "204              短片  1905-07-01 00:00:00   60  7.8      美国    B    A  \n",
       "205              音乐  2003-10-07 00:00:00  200  8.9      美国    B    E  \n",
       "206              动作  2012-12-12 00:00:00   60  5.8      美国    C    A  \n",
       "207        剧情/动画/短片  2012-03-20 00:00:00   60  8.7      美国    B    A  \n",
       "208              短片  2011-04-22 00:00:00   60  8.3      美国    B    A  \n",
       "209        剧情/喜剧/爱情  2002-09-19 00:00:00   60  6.7      美国    C    A  "
      ]
     },
     "execution_count": 51,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df1 = df[:10]\n",
    "df2 = df[100:110]\n",
    "df3 = df[200:210]\n",
    "dff = pd.concat([df1,df2,df3],axis = 0) #默认axis = 0，列拼接需要修改为1\n",
    "dff"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.11"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}